In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/project-shadow-data/secret_info_manual.pdf
/kaggle/input/project-shadow-data/response_framework.pdf


In [2]:
!pip install -q langchain chromadb transformers sentence-transformers pymupdf gradio

In [3]:
!pip install -U langchain-community



In [4]:
import fitz  # PyMuPDF
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import gradio as gr
import re


2025-04-14 17:12:49.735617: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744650769.775797     212 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744650769.788558     212 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [5]:
def load_pdf_text(path):
    doc = fitz.open(path)
    return "\n".join([page.get_text() for page in doc])

# Load your Kaggle dataset PDFs
pdf1_text = load_pdf_text("/kaggle/input/project-shadow-data/secret_info_manual.pdf")
pdf2_text = load_pdf_text("/kaggle/input/project-shadow-data/response_framework.pdf")

In [6]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
doc_chunks = text_splitter.create_documents([pdf1_text])

embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2",
    model_kwargs={"device": "cuda" if torch.cuda.is_available() else "cpu"}
)

vectorstore = Chroma.from_documents(documents=doc_chunks, embedding=embedding_model)
retriever = vectorstore.as_retriever()

  embedding_model = HuggingFaceEmbeddings(


In [7]:
MODEL_ID = "tiiuae/falcon-rw-1b"

model = AutoModelForCausalLM.from_pretrained(MODEL_ID, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

hf_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")
llm = HuggingFacePipeline(pipeline=hf_pipeline)


Device set to use cpu
  llm = HuggingFacePipeline(pipeline=hf_pipeline)


In [8]:
rules = [
    {
        "level": "Level 1",
        "topic_keywords": ["emergency extraction protocol"],
        "response_type": "semantic",
        "style": "step-by-step guide with explanations"
    },
    {
        "level": "Level 2",
        "topic_keywords": ["silent exit strategies"],
        "response_type": "semantic",
        "style": "direct tactical steps (omit unnecessary details)"
    },
    {
        "level": "Level 3",
        "topic_keywords": ["evading thermal surveillance"],
        "response_type": "semantic",
        "style": "suggest multiple escape techniques with pros and cons"
    },
    {
        "level": "Level 4",
        "topic_keywords": ["abort mission fallback"],
        "response_type": "semantic",
        "style": "use codewords and indirect phrasing"
    },
    {
        "level": "Level 5",
        "topic_keywords": ["project eclipse"],
        "response_type": "static",
        "response": "The unseen hand once cast a shadow on a forgotten empire. You know what that means."
    }
]
#mode
level_styles = {
    "Level 1": ("Salute, Shadow Cadet.", "Basic and instructional."),
    "Level 2": ("Bonjour, Sentinel.", "Tactical and direct."),
    "Level 3": ("Eyes open, Phantom.", "Analytical and strategic."),
    "Level 4": ("In the wind, Commander.", "Coded and essential only."),
    "Level 5": ("The unseen hand moves, Whisper.", "Layered and vague.")
}

In [9]:
def match_rule(level, query):
    for rule in rules:
        if rule["level"] != level:
            continue
        for keyword in rule["topic_keywords"]:
            if re.search(keyword, query, re.IGNORECASE):
                return rule
    return None

In [10]:
def handle_query(level, query):
    greeting, tone = level_styles.get(level, ("Agent.", ""))
    rule = match_rule(level, query)

    if not rule:
        return f"{greeting}\n{tone}\n\n🕵️ Response:\nYour query is not authorized at this level."

    if rule["response_type"] == "static":
        response = rule["response"]
    else:
        qa_chain = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=retriever,
            verbose=False
        )
        retrieved = qa_chain.run(query)
        response = f"{retrieved}\n\n(Response styled as: {rule['style']})"

    return f"{greeting}\n{tone}\n\n🕵️ Response:\n{response}"

In [None]:
def gradio_interface(level, query):
    return handle_query(level, query)

level_options = ["Level 1", "Level 2", "Level 3", "Level 4", "Level 5"]

interface = gr.Interface(
    fn=gradio_interface,
    inputs=[
        gr.Dropdown(choices=level_options, label="Clearance Level"),
        gr.Textbox(lines=3, placeholder="Enter your query here...", label="Mission Query")
    ],
    outputs=gr.Textbox(label="Response"),
    title="🕵️ Project Shadow – Secure Intelligence Assistant",
    description="Enter a classified mission query. The assistant will respond based on your clearance level and the current knowledge base."
)


interface.launch(debug=True)


* Running on local URL:  http://127.0.0.1:7860
Kaggle notebooks require sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

* Running on public URL: https://667ca6d206f34d4858.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


  retrieved = qa_chain.run(query)
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
