In [5]:
!pip install -q -U torch transformers embed_content

[0m[31mERROR: Could not find a version that satisfies the requirement embed_content (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for embed_content[0m[31m
[0m

In [6]:
!pip install chromadb google-generativeai langchain-text-splitters bitsandbytes accelerate


Collecting torch<3,>=2.3 (from bitsandbytes)
  Using cached torch-2.9.1-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (30 kB)
Using cached torch-2.9.1-cp312-cp312-manylinux_2_28_x86_64.whl (899.7 MB)
[0mInstalling collected packages: torch
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
torchvision 0.24.0+cu126 requires torch==2.9.0, but you have torch 2.9.1 which is incompatible.
torchaudio 2.9.0+cu126 requires torch==2.9.0, but you have torch 2.9.1 which is incompatible.[0m[31m
[0mSuccessfully installed torch-2.9.1


In [7]:
from chromadb import PersistentClient


In [1]:
# ==========================================
# üèóÔ∏è CELL 1: SETUP & MODEL LOADING (FIXED)
# (Run this ONCE. It takes ~2-3 minutes)
# ==========================================

# 1. Install Dependencies
# We force a re-install of bitsandbytes to fix the 'functional' attribute error
!pip install -q -U bitsandbytes transformers accelerate torch chromadb google-generativeai langchain-text-splitters

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
from huggingface_hub import login

# 2. Login to Hugging Face
# Replace with your actual token
HF_TOKEN = "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"

try:
    login(token=HF_TOKEN)
    print("‚úÖ Hugging Face Login Successful!")
except Exception as e:
    print(f"‚ùå Login Failed: {e}")

# 3. Load Llama 3 Model (GPU)
print("‚è≥ Loading Llama 3 (This takes a few minutes)...")
LLM_MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"

# Config for 4-bit quantization (Fits in Colab T4 GPU)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

try:
    tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_ID, token=HF_TOKEN)
    model = AutoModelForCausalLM.from_pretrained(
        LLM_MODEL_ID,
        quantization_config=bnb_config,
        device_map="auto",
        token=HF_TOKEN
    )

    # Create the pipeline (Global variable 'text_pipe' will be used in Cell 2)
    text_pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=512,
        temperature=0.1,
        top_p=0.9,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )
    print("‚úÖ Llama 3 Model Loaded Successfully! You can now run the next cell.")

except Exception as e:
    print(f"‚ùå Model Load Error: {e}")
    print("Tip: If the error persists, try 'Runtime > Restart Session' again.")

‚úÖ Hugging Face Login Successful!
‚è≥ Loading Llama 3 (This takes a few minutes)...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Device set to use cuda:0


‚úÖ Llama 3 Model Loaded Successfully! You can now run the next cell.


In [13]:
# ==========================================
# üß† CELL 2: HYBRID RAG LOGIC (FIXED DB PATH)
# ==========================================

import os
import shutil
import gc
import google.generativeai as genai
from chromadb import PersistentClient
from langchain_text_splitters import RecursiveCharacterTextSplitter

# --- 1. CONFIGURATION ---

# üîë PASTE YOUR GOOGLE API KEY HERE
os.environ["GEMINI_API_KEY"] = "AIzaSyDQ6wHtu6Ss67a8bjlxnlxedV1mEnERauI"

genai.configure(api_key=os.environ["GEMINI_API_KEY"])

# UPDATE: Changed path to bypass the "Readonly" lock error
DB_PATH = "./chroma_db_fresh_v2" # Changed to a new fresh path

# --- 2. HELPER FUNCTIONS ---

def get_embedding(text):
    return genai.embed_content(model="models/text-embedding-004", content=text)["embedding"]

def extract_metadata(filename):
    if "employee_handbook" in filename:
        return {"effective_date": "2024-01-15", "role_scope": "all_employees", "doc_type": "handbook"}
    if "manager_updates" in filename:
        return {"effective_date": "2024-06-01", "role_scope": "all_employees", "doc_type": "policy_update"}
    if "intern_onboarding" in filename:
        return {"effective_date": "2024-06-01", "role_scope": "interns", "doc_type": "role_specific"}
    return {"effective_date": "2024-01-01", "role_scope": "all_employees", "doc_type": "general"}

def date_to_int(date_str):
    return int(date_str.replace("-", ""))

# --- 3. INGESTION LOGIC ---

def run_ingestion():
    # Force cleanup of the specific DB path
    # Ensure any lingering file handles are released before deletion
    gc.collect()
    if os.path.exists(DB_PATH):
        try:
            shutil.rmtree(DB_PATH)
            print(f"üßπ Removed old database at {DB_PATH}")
        except Exception as e:
            print(f"‚ö†Ô∏è Could not delete old DB (might be locked). Error: {e}. Using new path anyway.")

    client = PersistentClient(path=DB_PATH)
    collection = client.get_or_create_collection(name="nebula_policies")

    filenames = [
        "employee_handbook_v1.txt",
        "manager_updates_2024.txt",
        "intern_onboarding_faq.txt"
    ]

    ids, docs, metas, embs = [], [], [], []
    splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=100)

    print("üìñ Reading files from disk...")

    for filename in filenames:
        if not os.path.exists(filename):
            print(f"‚ùå ERROR: File '{filename}' not found! Please upload it to the Colab 'Files' tab.")
            continue

        with open(filename, "r", encoding="utf-8") as f:
            text = f.read()

        chunks = splitter.split_text(text)
        base_meta = extract_metadata(filename)

        for i, chunk in enumerate(chunks):
            ids.append(f"{filename}_{i}")
            docs.append(chunk)
            metas.append({**base_meta, "filename": filename})
            embs.append(get_embedding(chunk))

    if len(ids) > 0:
        collection.add(ids=ids, documents=docs, metadatas=metas, embeddings=embs)
        print(f"‚úÖ Ingestion Complete. {len(ids)} chunks stored in {DB_PATH}.")
    else:
        print("‚ùå No data ingested.")

    # Explicitly delete client to release file lock
    del client
    gc.collect()

# --- 4. RETRIEVAL LOGIC ---

def retrieve_documents(query, user_role):
    client = PersistentClient(path=DB_PATH)
    collection = client.get_collection("nebula_policies")

    q_emb = get_embedding(query)
    results = collection.query(query_embeddings=[q_emb], n_results=5)

    chunks = []
    if results['documents']:
        for doc, meta, dist in zip(results['documents'][0], results['metadatas'][0], results['distances'][0]):
            chunks.append({"text": doc, "meta": meta, "dist": dist})

    ranked = []
    target_scope = f"{user_role}s"
    for ch in chunks:
        meta = ch["meta"]
        role_match = 1 if meta["role_scope"] == target_scope else 0
        date_val = date_to_int(meta["effective_date"])

        ranked.append({
            "chunk": ch,
            "score": (-role_match, -date_val, ch["dist"])
        })

    ranked = sorted(ranked, key=lambda x: x["score"])

    # Explicitly delete client to release file lock
    del client
    gc.collect()

    return [r["chunk"] for r in ranked[:3]]

# --- 5. LLM QUERY LOGIC (Llama 3 - CRISP MODE) ---

def query_llama(prompt_text):
    messages = [
        {"role": "system", "content": "You are a concise, strict, and authoritative policy assistant."},
        {"role": "user", "content": prompt_text},
    ]

    prompt = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

    outputs = text_pipe(prompt)
    generated_text = outputs[0]["generated_text"][len(prompt):]
    return generated_text.strip()

def detect_role_local(query):
    prompt = f"Extract role from query (intern, employee, manager). Return ONLY the word. Query: '{query}'"
    role = query_llama(prompt).lower()
    if "intern" in role: return "intern"
    if "manager" in role: return "manager"
    return "employee"

# def ask_nebula_llama(query):
#     print("  ...Detecting Role")
#     user_role = detect_role_local(query)

#     print("  ...Retrieving Documents")
#     chunks = retrieve_documents(query, user_role)

#     context_text = ""
#     for d in chunks:
#         context_text += f"\n[DOCUMENT]\nSource: {d['meta']['filename']}\nEffective Date: {d['meta']['effective_date']}\nRole Scope: {d['meta']['role_scope']}\nContent: {d['text']}\n-------------------\n"

#     print("  ...Generating Answer")
#     prompt = f"""
#     You are NebulaGears' policy assistant.
#     CURRENT USER ROLE: {user_role}

#     CONFLICT RULES:
#     1. INTERN RULE: If user is 'intern', ONLY 'interns' scope applies.
#     2. RECENCY RULE: Newer dates override older ones.
#     3. CITATION: Must cite the Source filename.

#     STYLE GUIDELINES (STRICT):
#     - Answer directly (Yes/No).
#     - Be extremely concise (max 2 sentences).
#     - Do NOT use filler phrases like "Based on the documents".
#     - End with "Source: filename.txt" on a new line.

#     CONTEXT:
#     {context_text}

#     QUESTION: "{query}"
#     """
#     return query_llama(prompt)

# --- EXECUTION ---
run_ingestion()

# print("\n" + "="*50)
# question = "I just joined as an intern. Can I work from home?"
# print(f"‚ùì User: {question}")
# response = ask_nebula_llama(question)
# print(f"ü¶ô Assistant:\n{response}")

üìñ Reading files from disk...
‚úÖ Ingestion Complete. 71 chunks stored in ./chroma_db_fresh_v2.

‚ùì User: I just joined as an intern. Can I work from home?
  ...Detecting Role
  ...Retrieving Documents
  ...Generating Answer
ü¶ô Assistant:
No. Interns are required to be in the office five days a week, unless a specific role-based exception is specified in an offer letter or documented by People & Culture.


In [19]:
# ==========================================
# ‚ö° CELL 3: CLEAN OUTPUT VERSION (No "Thinking" logs)
# ==========================================

def ask_nebula_llama_clean(query):
    # 1. Detect Role (No print statements)
    user_role = detect_role_local(query)

    # 2. Retrieve Documents (No print statements)
    chunks = retrieve_documents(query, user_role)

    context_text = ""
    for d in chunks:
        context_text += f"\n[DOCUMENT]\nSource: {d['meta']['filename']}\nEffective Date: {d['meta']['effective_date']}\nRole Scope: {d['meta']['role_scope']}\nContent: {d['text']}\n-------------------\n"

    # 3. Generate Answer
    prompt = f"""
    You are NebulaGears' policy assistant.
    User Role: {user_role}

    CONFLICT RULES:
    1. Interns = Intern rules override everything.
    2. Employees = Newer dates override older dates.

    CONTEXT:
    {context_text}

    QUESTION: "{query}"

    REQUIRED OUTPUT FORMAT:
    [Direct Answer]
    Source: [Filename]

    YOUR ANSWER:
    """
    return query_llama(prompt)

# --- EXECUTION ---
question = "I just joined as an intern. Can I work from home?"

# Print exactly as requested: Q, Answer, Citation
print(f"Q: {question}")
print(ask_nebula_llama_clean(question))

Q: I just joined as an intern. Can I work from home?
**No**, you cannot work from home as an intern. According to the Core Policy ‚Äî Office Presence, interns are required to be in the office 5 days a week for the duration of their internship to maximize mentorship. No remote work is permitted for interns. (Source: intern_onboarding_faq.txt)


In [24]:
question = "Do I need approval to work from home?"

# Print exactly as requested: Q, Answer, Citation
print(f"Q: {question}")
print(ask_nebula_llama_clean(question))

Q: Do I need approval to work from home?
**Answer:** No, you do not need approval to work from home. However, please note that:

* As an intern, you are subject to the intern rules, which override all other rules. According to the intern_onboarding_faq.txt, you should notify People & Culture and your manager if you have a medical need or other compelling personal situation requiring periodic remote work.
* As a manager, you should consult the manager_updates_2024.txt, which clarifies that temporary remote working for short windows (e.g., travel or emergencies) requires People & Culture consent.
* For employees, the employee_handbook_v1.txt FAQ states that the Work From Anywhere program does not require prior approval for remote days, but managers should be informed of extended travel or timezone changes impacting team collaboration.

**Source:** intern_onboarding_faq.txt, manager_updates_2024.txt, and employee_handbook_v1.txt


In [25]:
# ==========================================
# üß™ FINAL TEST SUITE (15 QUESTIONS)
# ==========================================

# 1. Define the 15 Questions
test_data = {
    "GROUP 1: INTERN QUESTIONS (Strict Override)": [
        "I just joined as an intern. Can I work from home?",
        "As a new intern, can I work remotely 3 days a week like employees?",
        "Do interns need manager approval for remote work?"
    ],
    "GROUP 2: EMPLOYEE QUESTIONS (Manager Update Override)": [
        "Do employees still follow the Work From Anywhere policy?",
        "Do I need approval to work from home?",
        "Can employees work remotely 100% of the time?"
    ],
    "GROUP 3: MANAGER QUESTIONS (Policy Enforcement)": [
        "As a manager, can I approve 5 remote days per week?",
        "Which days must employees be in the HQ office?",
        "Can I allow my entire team to work remotely all week?"
    ],
    "GROUP 4: NEGATION & LOGIC (Tricky phrasing)": [
        "Do I NOT need approval to work remotely?",
        "Is it NOT mandatory to come to office on certain days?",
        "Is it true that interns do NOT have to come every day?"
    ],
    "GROUP 5: CONFLICT RESOLUTION META-QUESTIONS": [
        "Which document do I follow if handbook says no approval is needed but manager update says approval is required?",
        "Does the manager update apply to interns?",
        "If two policies conflict, which one wins?"
    ]
}

# 2. Select the active RAG function (Gemini or Llama)
# We check which function you ran last
if 'ask_nebula' in globals():
    runner_fn = ask_nebula
elif 'ask_nebula_llama_clean' in globals():
    runner_fn = ask_nebula_llama_clean
elif 'ask_nebula_llama' in globals():
    runner_fn = ask_nebula_llama
else:
    print("‚ùå ERROR: No RAG function found! Please run your RAG Logic cell first.")
    runner_fn = None

# 3. Run the Loop
if runner_fn:
    print(f"üöÄ STARTING TEST RUN [Using: {runner_fn.__name__}]")

    for group_name, questions in test_data.items():
        print(f"\n{'='*70}")
        print(f"üìÇ {group_name}")
        print(f"{'='*70}")

        for q in questions:
            print(f"\n‚ùì Q: {q}")

            try:
                # Call the AI
                ans = runner_fn(q)
                print(f"ü§ñ A:\n{ans.strip()}")
            except Exception as e:
                print(f"‚ùå Error: {e}")

            print("-" * 40)

    print("\n‚úÖ TEST SUITE COMPLETE.")

üöÄ STARTING TEST RUN [Using: ask_nebula_llama_clean]

üìÇ GROUP 1: INTERN QUESTIONS (Strict Override)

‚ùì Q: I just joined as an intern. Can I work from home?
ü§ñ A:
**No**, you cannot work from home as an intern. According to the Core Policy ‚Äî Office Presence, interns are required to be in the office 5 days a week for the duration of their internship to maximize mentorship. No remote work is permitted for interns. (Source: intern_onboarding_faq.txt)
----------------------------------------

‚ùì Q: As a new intern, can I work remotely 3 days a week like employees?
ü§ñ A:
**Direct Answer:** No, as a new intern, you are not allowed to work remotely 3 days a week like employees. According to the Core Policy ‚Äî Office Presence, interns are required to be in the office 5 days a week for the duration of their internship to maximize mentorship. No remote work is permitted for interns.
----------------------------------------

‚ùì Q: Do interns need manager approval for remote work?
