In [None]:
# === Colab-friendly RAG Week1-3 demo (paste & run in one cell) ===

# 1) Install dependencies (tries to avoid common Colab conflicts)
import sys, subprocess, os, json, textwrap

def pip_install(packages):
    cmd = [sys.executable, "-m", "pip", "install", "--no-warn-script-location"] + packages
    print("Running:", " ".join(cmd))
    subprocess.check_call(cmd)

# Try lightweight installs; sentence-transformers will bring torch automatically.
try:
    # faiss-cpu sometimes fails with latest builds; pin a compatible release if needed.
    pip_install([
        "pdfplumber",
        "python-docx",
        "beautifulsoup4",
        "sentence-transformers",
        "faiss-cpu",
        "openai"
    ])
except subprocess.CalledProcessError as e:
    # If faiss-cpu install fails, try a different common wheel
    print("Primary install failed:", e)
    print("Attempting fallback install for faiss-cpu...")
    try:
        pip_install(["faiss-cpu==1.7.4"])
    except Exception as e2:
        print("faiss-cpu fallback failed; you may need to install faiss manually.")
        # continue — index.py will raise helpful error if faiss missing

# 2) Write helper files (utils.py, ingest.py, index.py, query_demo.py)
utils_py = r'''
import re, json
from typing import List, Dict, Any

def save_jsonl(path: str, records: List[Dict[str, Any]]):
    with open(path, "w", encoding="utf-8") as f:
        for r in records:
            f.write(json.dumps(r, ensure_ascii=False) + "\n")

def load_jsonl(path: str):
    out=[]
    with open(path,"r",encoding="utf-8") as f:
        for line in f:
            out.append(json.loads(line))
    return out

def simple_chunk_text(text: str, chunk_size_chars: int = 2000, overlap_chars: int = 200):
    text = text.replace('\\r\\n','\\n')
    paragraphs = text.split('\\n\\n')
    chunks = []
    buffer=""
    for p in paragraphs:
        if len(buffer) + len(p) + 2 <= chunk_size_chars:
            buffer = (buffer + "\\n\\n" + p).strip()
        else:
            if buffer:
                chunks.append(buffer.strip())
            if len(p) > chunk_size_chars:
                start=0
                while start < len(p):
                    part = p[start:start+chunk_size_chars]
                    chunks.append(part.strip())
                    start += chunk_size_chars - overlap_chars
                buffer=""
            else:
                buffer=p
    if buffer:
        chunks.append(buffer.strip())
    stitched=[]
    for i,c in enumerate(chunks):
        if i==0:
            stitched.append(c)
        else:
            prev=stitched[-1]
            overlap = prev[-overlap_chars:] if len(prev)>overlap_chars else prev
            stitched.append((overlap+"\\n"+c).strip())
    return stitched

def normalize_text(text: str) -> str:
    text = re.sub(r'\\s+', ' ', text)
    return text.strip()

def build_prompt(question: str, chunks: List[Dict[str, Any]], instructions: str = None) -> str:
    system = instructions or (
        "You are a helpful domain expert. Answer using ONLY the provided document excerpts. "
        "Cite sources inline using [DOC<n>]. If not supported, say 'I don't know'."
    )
    parts = ["System: " + system, "\\nUser question: " + question, "\\nProvided documents:"]
    for i,ch in enumerate(chunks, start=1):
        title = ch.get('metadata',{}).get('title') or f"doc_{i}"
        meta = f"Title: {title} | Source: {ch.get('metadata',{}).get('source','')}"
        parts.append(f"[DOC{i}] {meta}\\n" + ch['text'][:3000])
    parts.append("\\nAnswer (include citations like [DOC1]):")
    return "\\n\\n".join(parts)
'''

ingest_py = r'''
import os, uuid
from utils import simple_chunk_text, normalize_text, save_jsonl

def read_text_from_path(path: str) -> str:
    ext = os.path.splitext(path)[1].lower()
    if ext == '.pdf':
        try:
            import pdfplumber
        except:
            raise RuntimeError("pdfplumber not installed")
        text=[]
        with pdfplumber.open(path) as pdf:
            for page in pdf.pages:
                p = page.extract_text()
                if p:
                    text.append(p)
        return "\n\n".join(text)
    elif ext in ['.docx','.doc']:
        try:
            import docx
        except:
            raise RuntimeError("python-docx not installed")
        doc = docx.Document(path)
        paras = [p.text for p in doc.paragraphs if p.text and p.text.strip()]
        return "\n\n".join(paras)
    else:
        with open(path,'r',encoding='utf-8') as f:
            return f.read()

def ingest_file(path: str, title: str=None, chunk_size:int=2000, overlap:int=200):
    text = read_text_from_path(path)
    text = normalize_text(text)
    chunks = simple_chunk_text(text, chunk_size_chars=chunk_size, overlap_chars=overlap)
    records=[]
    doc_id = str(uuid.uuid4())
    for i,c in enumerate(chunks,start=1):
        rec = {
            'id': f"{doc_id}_{i}",
            'doc_id': doc_id,
            'text': c,
            'metadata': {'title': title or os.path.basename(path), 'source': os.path.abspath(path), 'chunk_index': i}
        }
        records.append(rec)
    return records

if __name__=='__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--input', '-i', required=True)
    parser.add_argument('--output', '-o', default='chunks.jsonl')
    args = parser.parse_args()
    recs = ingest_file(args.input)
    save_jsonl(args.output, recs)
    print(f"Saved {len(recs)} chunks to {args.output}")
'''

index_py = r'''
import os, pickle, numpy as np
from utils import load_jsonl
try:
    import faiss
except Exception:
    faiss=None

try:
    from sentence_transformers import SentenceTransformer
except Exception:
    SentenceTransformer=None

try:
    import openai
except:
    openai=None

class Embedder:
    def __init__(self, model_name='all-MiniLM-L6-v2', use_openai=False):
        self.use_openai = use_openai and (openai is not None)
        self.model_name = model_name
        if not self.use_openai:
            if SentenceTransformer is None:
                raise RuntimeError("sentence-transformers not installed")
            self.model = SentenceTransformer(model_name)
    def embed_texts(self, texts):
        if self.use_openai:
            emb=[]
            for t in texts:
                resp = openai.Embedding.create(model='text-embedding-3-small', input=t)
                v = np.array(resp['data'][0]['embedding'], dtype='float32')
                emb.append(v)
            return np.vstack(emb)
        else:
            arr = self.model.encode(texts, show_progress_bar=True, convert_to_numpy=True)
            if arr.dtype != np.float32:
                arr = arr.astype('float32')
            return arr

class FaissIndexWrapper:
    def __init__(self, dim, index_path='faiss.index', meta_path='meta.pkl'):
        if faiss is None:
            raise RuntimeError("faiss not installed")
        self.dim = dim
        self.index = faiss.IndexFlatIP(dim)
        self.index_path = index_path
        self.meta_path = meta_path
        self.metadata=[]
    def add(self, vectors, metas):
        faiss.normalize_L2(vectors)
        self.index.add(vectors)
        self.metadata.extend(metas)
    def save(self):
        faiss.write_index(self.index, self.index_path)
        with open(self.meta_path,'wb') as f:
            pickle.dump(self.metadata,f)
    def load(self):
        if os.path.exists(self.index_path):
            self.index = faiss.read_index(self.index_path)
        if os.path.exists(self.meta_path):
            with open(self.meta_path,'rb') as f:
                self.metadata = pickle.load(f)
    def search(self, qvect, top_k=5):
        faiss.normalize_L2(qvect)
        D,I = self.index.search(qvect, top_k)
        results=[]
        for i_list, d_list in zip(I.tolist(), D.tolist()):
            row=[]
            for idx, score in zip(i_list, d_list):
                if idx < 0 or idx >= len(self.metadata): continue
                meta = self.metadata[idx].copy()
                meta['_score']=float(score)
                row.append(meta)
            results.append(row)
        return results

if __name__=='__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--chunks', required=True)
    parser.add_argument('--index_path', default='faiss.index')
    parser.add_argument('--meta_path', default='meta.pkl')
    parser.add_argument('--use_openai', action='store_true')
    parser.add_argument('--model', default='all-MiniLM-L6-v2')
    args = parser.parse_args()
    recs = load_jsonl(args.chunks)
    texts = [r['text'] for r in recs]
    metas = [{'id':r['id'],'doc_id':r['doc_id'],'metadata':r.get('metadata',{}),'text':r['text']} for r in recs]
    embedder = Embedder(model_name=args.model, use_openai=args.use_openai)
    embs = embedder.embed_texts(texts)
    dim = embs.shape[1]
    wrapper = FaissIndexWrapper(dim=dim, index_path=args.index_path, meta_path=args.meta_path)
    wrapper.add(embs, metas)
    wrapper.save()
    print(f"Indexed {len(texts)} chunks")
'''

query_demo_py = r'''
import os, json
from utils import build_prompt, load_jsonl
from index import Embedder, FaissIndexWrapper

def run_query(question, top_k=5, use_openai_emb=False, openai_key=None, gen_with_openai=False, gen_model='gpt-4o-mini'):
    if openai_key:
        import openai
        openai.api_key = openai_key
    embedder = Embedder(use_openai=use_openai_emb)
    dim = embedder.model.get_sentence_embedding_dimension() if not use_openai_emb else 1536
    wrapper = FaissIndexWrapper(dim=dim)
    try:
        wrapper.load()
    except Exception as e:
        print("Failed to load index:", e)
        return
    q_emb = embedder.embed_texts([question])
    results = wrapper.search(q_emb, top_k=top_k)[0]
    chunks = [{'id':h['id'],'text':h['text'],'metadata':h.get('metadata',{})} for h in results]
    prompt = build_prompt(question, chunks)
    if gen_with_openai and openai_key:
        import openai
        resp = openai.ChatCompletion.create(
            model=gen_model,
            messages=[{"role":"system","content":prompt}],
            temperature=0.0,
            max_tokens=512
        )
        ans = resp['choices'][0]['message']['content'].strip()
    else:
        ans = "[No generation configured] Prompt below:\\n\\n" + prompt
    return {'answer': ans, 'sources': [{'id':h['id'],'title':h.get('metadata',{}).get('title'),'_score':h.get('_score')} for h in results]}

if __name__=='__main__':
    res = run_query("What is the main topic of the document?", top_k=3)
    print(json.dumps(res, indent=2))
'''

# write files
open('utils.py', 'w', encoding='utf-8').write(utils_py)
open('ingest.py', 'w', encoding='utf-8').write(ingest_py)
open('index.py', 'w', encoding='utf-8').write(index_py)
open('query_demo.py', 'w', encoding='utf-8').write(query_demo_py)
print("Wrote utils.py, ingest.py, index.py, query_demo.py")

# 3) Create a sample document
sample_text = textwrap.dedent("""
Title: Sample Policy Document

Section 1: Introduction
This document explains the sample policy for demonstration purposes. The policy covers acceptable usage, privacy, and responsibilities.

Section 2: Acceptable Usage
Users must follow the rules. Do not attempt to circumvent restrictions. Use the platform for lawful purposes only.

Section 3: Privacy
Personal data must be handled carefully. Sensitive data should be redacted and stored securely.

Section 4: Responsibilities
Administrators manage users and content. Users are responsible for their own actions. Contact admin@example.com for support.
""").strip()

with open('sample_doc.txt', 'w', encoding='utf-8') as f:
    f.write(sample_text)
print("Wrote sample_doc.txt")

# 4) Ingest the sample doc to create chunks.jsonl
print("\n=== Running ingestion ===")
!python ingest.py --input sample_doc.txt --output chunks.jsonl

# show chunks
print("\n--- chunks.jsonl preview ---")
!sed -n '1,200p' chunks.jsonl || true

# 5) Build embeddings and FAISS index (local sentence-transformers by default)
print("\n=== Building index (this may download models; allow a few minutes) ===")
!python index.py --chunks chunks.jsonl --index_path faiss.index --meta_path meta.pkl --model all-MiniLM-L6-v2

# 6) Optional: set OPENAI_API_KEY if you want to use OpenAI for embeddings/generation
print("\nIf you want OpenAI usage, set environment variable OPENAI_API_KEY before running the query step.")
print("You can do: os.environ['OPENAI_API_KEY'] = 'sk-...' in a new cell.")

# 7) Run a demo query (no OpenAI by default -> prints the prompt)
print("\n=== Running demo query ===")
from query_demo import run_query
res = run_query("What does the document say about privacy?", top_k=3,
                use_openai_emb=False,
                openai_key=os.environ.get('OPENAI_API_KEY'),
                gen_with_openai=False,
                gen_model='gpt-4o-mini')
print(json.dumps(res, indent=2))


In [None]:
# === Colab: Week4-5 Extension (Reranker + Extractive QA + Eval) ===
# Paste & run in the same Colab session where Week1-3 files exist.

import sys, subprocess, os, json, textwrap
from pathlib import Path

# ---------- 1) Install any missing packages ----------
def pip_install(packages):
    cmd = [sys.executable, "-m", "pip", "install", "--no-warn-script-location"] + packages
    print("Running:", " ".join(cmd))
    subprocess.check_call(cmd)

try:
    pip_install(["cross-encoder", "transformers", "nltk"])
except subprocess.CalledProcessError as e:
    print("Install failed:", e)
    # proceed; some packages might already exist

# Download punkt and punkt_tab if needed
import nltk
try:
    nltk.data.find("tokenizers/punkt")
except LookupError:
    nltk.download("punkt")
try:
    nltk.data.find("tokenizers/punkt_tab")
except LookupError:
    # Try downloading punkt_tab explicitly
    nltk.downloader.download('punkt_tab')


# ---------- 2) Write reranker + qa utility module (rerank_and_qa.py) ----------
rerank_and_qa = r'''
import math
import nltk
from typing import List, Dict, Any, Tuple

# Cross-encoder reranker (sentence-transformers cross-encoder)
try:
    from sentence_transformers import CrossEncoder
except Exception as e:
    CrossEncoder = None
    print("CrossEncoder import failed:", e)

_cross_encoder_cache = {}

def get_cross_encoder(model_name: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"):
    if model_name in _cross_encoder_cache:
        return _cross_encoder_cache[model_name]
    if CrossEncoder is None:
        raise RuntimeError("CrossEncoder not available. Install sentence-transformers.")
    model = CrossEncoder(model_name)
    _cross_encoder_cache[model_name] = model
    return model

def rerank(question: str, candidates: List[Dict[str, Any]], top_n: int = 5, model_name: str = "cross-encoder/ms-marco-MiniLM-L-6-v2") -> List[Dict[str, Any]]:
    """
    candidates: list of dicts with keys: 'id','text','metadata'
    Returns top_n candidates sorted by cross-encoder score (higher is better).
    """
    if not candidates:
        return []
    model = get_cross_encoder(model_name)
    # Prepare inputs: pairs of (question, candidate_text)
    texts = [c['text'] for c in candidates]
    inputs = [[question, t] for t in texts]
    scores = model.predict(inputs)  # shape (len(candidates),)
    # attach scores and sort
    out = []
    for c, s in zip(candidates, scores):
        cc = c.copy()
        cc['_rerank_score'] = float(s)
        out.append(cc)
    out.sort(key=lambda x: x['_rerank_score'], reverse=True)
    return out[:top_n]

# -------- Extractive QA fallback (simple sentence-level selection) --------
def extractive_answer(question: str, top_chunks: List[Dict[str, Any]], top_k_sentences:int = 2) -> Tuple[str, List[Dict[str,Any]]]:
    """
    Simple extractive approach:
      - Split each chunk into sentences
      - Score sentences by overlap of important words (very simple)
      - Return top sentences concatenated as the extractive answer
    Returns: (answer_text, list_of_evidence_dicts)
    evidence_dict: {'doc_id','chunk_id','sentence','score','citation'}
    """
    import re
    from collections import Counter
    # basic tokenization
    def tokenize(s):
        return [t.lower() for t in re.findall(r"\\w+", s)]
    q_tokens = tokenize(question)
    q_counter = Counter(q_tokens)

    evidences = []
    for chunk in top_chunks:
        sents = nltk.sent_tokenize(chunk['text'])
        for sent in sents:
            tokens = tokenize(sent)
            # score: sum of counts of tokens appearing in question, weighted by token length
            score = sum(q_counter.get(t,0) for t in tokens)
            # also prefer longer sentences if score equal
            score = score + 0.001 * len(tokens)
            evidences.append({
                'doc_id': chunk.get('doc_id'),
                'chunk_id': chunk.get('id'),
                'sentence': sent,
                'score': float(score),
                'metadata': chunk.get('metadata',{})
            })
    # sort evidences
    evidences.sort(key=lambda x: x['score'], reverse=True)
    if not evidences:
        return ("I couldn't find relevant sentences in the provided documents.", [])
    top = evidences[:top_k_sentences]
    answer = " ".join([t['sentence'] for t in top])
    # add citation labels like [DOCn] using metadata title
    for i, t in enumerate(top, start=1):
        title = t.get('metadata',{}).get('title') or t.get('doc_id')
        t['citation'] = f"[{title}]"
    return (answer, top)
'''

open('rerank_and_qa.py', 'w', encoding='utf-8').write(rerank_and_qa)
print("Wrote rerank_and_qa.py")

# ---------- 3) Update/Write an improved query runner (query_demo_v2.py) ----------
query_demo_v2 = r'''
import os, json
from utils import build_prompt, load_jsonl
from index import Embedder, FaissIndexWrapper
from rerank_and_qa import rerank, extractive_answer

def run_query_v2(question, top_k=10, rerank_top_n=5, use_openai_emb=False, openai_key=None, gen_with_openai=False, gen_model='gpt-4o-mini'):
    """
    Steps:
    1. Embed question
    2. Retrieve top_k from FAISS
    3. Rerank top_k using cross-encoder -> top rerank_top_n
    4. If gen_with_openai True and key provided -> build prompt and call OpenAI
       Else -> run extractive_answer on top reranked chunks and return extractive result with citations
    """
    if openai_key:
        try:
            import openai
            openai.api_key = openai_key
        except Exception as e:
            print("OpenAI import failed:", e)
            openai = None

    # 1. embed + 2. retrieve
    embedder = Embedder(use_openai=use_openai_emb)
    dim = embedder.model.get_sentence_embedding_dimension() if not use_openai_emb else 1536
    wrapper = FaissIndexWrapper(dim=dim)
    try:
        wrapper.load()
    except Exception as e:
        raise RuntimeError(f"Failed to load index: {e}")
    q_emb = embedder.embed_texts([question])
    raw_results = wrapper.search(q_emb, top_k)
    hits = raw_results[0] if raw_results else []
    # convert to candidate format expected by reranker
    candidates = [{'id': h['id'], 'text': h['text'], 'metadata': h.get('metadata',{}), 'doc_id': h.get('doc_id')} for h in hits]
    if not candidates:
        return {'answer': "No retrieved candidates.", 'sources': [], 'reranked': []}

    # 3. rerank
    try:
        reranked = rerank(question, candidates, top_n=rerank_top_n)
    except Exception as e:
        # if cross-encoder unavailable, fallback to original order
        print("Rerank failed, falling back to initial order:", e)
        reranked = candidates[:rerank_top_n]

    # 4. generate or extractive fallback
    if gen_with_openai and openai_key:
        # build prompt from top reranked
        prompt_chunks = [{'id': c['id'], 'text': c['text'], 'metadata': c.get('metadata',{})} for c in reranked]
        prompt = build_prompt(question, prompt_chunks)
        import openai
        resp = openai.ChatCompletion.create(
            model=gen_model,
            messages=[{"role":"system","content":prompt}],
            max_tokens=512,
            temperature=0.0,
        )
        answer = resp['choices'][0]['message']['content'].strip()
        sources = [{'id': c['id'], 'title': c.get('metadata',{}).get('title'), '_rerank_score': c.get('_rerank_score')} for c in reranked]
        return {'answer': answer, 'sources': sources, 'reranked': reranked}
    else:
        # extractive fallback
        answer_text, evidence = extractive_answer(question, reranked, top_k_sentences=2)
        # format sources
        sources = []
        for c in reranked:
            sources.append({'id': c['id'], 'title': c.get('metadata',{}).get('title'), '_rerank_score': c.get('_rerank_score')})
        return {'answer': answer_text, 'evidence': evidence, 'sources': sources, 'reranked': reranked}
'''

open('query_demo_v2.py', 'w', encoding='utf-8').write(query_demo_v2)
print("Wrote query_demo_v2.py")

# ---------- 4) Write a small evaluation script (eval_recall.py) ----------
eval_recall = r'''
"""
Simple evaluation script:
- Maintains a tiny QA testset (list of dicts: question, gold_doc_title)
- Computes Recall@k: whether gold doc title appears in top-k retrieved docs
"""

import json
from index import Embedder, FaissIndexWrapper
from utils import load_jsonl

def recall_at_k(testset, k=5, use_openai_emb=False):
    embedder = Embedder(use_openai=use_openai_emb)
    dim = embedder.model.get_sentence_embedding_dimension() if not use_openai_emb else 1536
    wrapper = FaissIndexWrapper(dim=dim)
    wrapper.load()
    total = len(testset)
    hits = 0
    for t in testset:
        q = t['question']
        gold = t['gold_title']
        q_emb = embedder.embed_texts([q])
        res = wrapper.search(q_emb, k)[0]
        titles = [r.get('metadata',{}).get('title') for r in res]
        if gold in titles:
            hits += 1
    return hits / total if total>0 else 0.0

if __name__=='__main__':
    # example small testset; extend with your own mappings to real documents
    testset = [
        {"question": "What does the document say about privacy?", "gold_title": "sample_doc.txt"},
        {"question": "Who manages users and content?", "gold_title": "sample_doc.txt"},
        {"question": "What are acceptable uses?", "gold_title": "sample_doc.txt"}
    ]
    for k in [1,3,5]:
        r = recall_at_k(testset, k=k)
        print(f"Recall@{k}: {r:.3f}")
'''

open('eval_recall.py', 'w', encoding='utf-8').write(eval_recall)
print("Wrote eval_recall.py")

# ---------- 5) Demo run: use query_demo_v2 to show rerank + extractive QA ----------
print("\n=== Demo: run query_demo_v2 (rerank + extractive QA fallback) ===")
from query_demo_v2 import run_query_v2
res = run_query_v2("What does the document say about privacy?", top_k=5, rerank_top_n=3, use_openai_emb=False, openai_key=os.environ.get('OPENAI_API_KEY'), gen_with_openai=False)
print(json.dumps(res, indent=2, ensure_ascii=False))

# ---------- 6) Run small recall evaluation ----------
print("\n=== Running small Recall@k evaluation (eval_recall.py) ===")
!python eval_recall.py

print("\nDone. You now have:")
print("- rerank_and_qa.py : cross-encoder reranker + extractive QA")
print("- query_demo_v2.py : end-to-end query runner with rerank + optional OpenAI generation")
print("- eval_recall.py   : small Recall@k evaluator\n")
print("Next suggestions:")
print("- If you have an OpenAI key and want model generation, set os.environ['OPENAI_API_KEY'] and call run_query_v2 with gen_with_openai=True.")
print("- Expand the small testset in eval_recall.py with real questions + gold doc titles to measure retrieval quality.")

In [None]:
# === Colab cell: Week 6–8 (FastAPI v2, eval_full, Dockerfile, docker-compose) ===
import os, textwrap, json, subprocess, sys
from pathlib import Path

ROOT = Path.cwd()

# ---------- 1) Ensure required packages ----------
def pip_install(packages):
    cmd = [sys.executable, "-m", "pip", "install", "--no-warn-script-location"] + packages
    print("Installing:", " ".join(packages))
    try:
        subprocess.check_call(cmd)
    except subprocess.CalledProcessError as e:
        print("Install failed for some packages:", e)

pip_install([
    "fastapi", "uvicorn[standard]", "python-multipart",
    "pydantic", "aiofiles", "cryptography", "pyjwt", "requests"
])

# ---------- 2) rag_service_v2.py ----------
rag_service_v2 = r'''
# rag_service_v2.py
import os, threading, tempfile, uuid
from typing import List
from fastapi import FastAPI, UploadFile, File, HTTPException
from pydantic import BaseModel
from pathlib import Path

from ingest import ingest_file
from index import Embedder, FaissIndexWrapper
from utils import build_prompt
from rerank_and_qa import rerank, extractive_answer

OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
try:
    import openai
    if OPENAI_API_KEY:
        openai.api_key = OPENAI_API_KEY
except Exception:
    openai = None

INDEX_PATH = os.environ.get("INDEX_PATH", "faiss.index")
META_PATH = os.environ.get("META_PATH", "meta.pkl")
EMBED_MODEL = os.environ.get("EMBED_MODEL", "all-MiniLM-L6-v2")
USE_OPENAI_EMB = os.environ.get("USE_OPENAI_EMB", "false").lower() in ("1","true","yes")

app = FastAPI(title="RAG Assistant v2")

embedder = Embedder(model_name=EMBED_MODEL, use_openai=USE_OPENAI_EMB)
dim = embedder.model.get_sentence_embedding_dimension() if not USE_OPENAI_EMB else 1536
wrapper = FaissIndexWrapper(dim=dim, index_path=INDEX_PATH, meta_path=META_PATH)
try:
    wrapper.load()
    print("Loaded existing FAISS index.")
except Exception:
    print("No existing index loaded; starting empty.")

_index_lock = threading.Lock()

def index_chunks(records):
    texts = [r['text'] for r in records]
    metas = [{'id': r['id'], 'doc_id': r.get('doc_id'),
              'metadata': r.get('metadata', {}), 'text': r['text']} for r in records]
    embs = embedder.embed_texts(texts)
    with _index_lock:
        wrapper.add(embs, metas)
        wrapper.save()
    return len(records)

@app.post("/upload")
async def upload(files: List[UploadFile] = File(...)):
    if not files:
        raise HTTPException(status_code=400, detail="No files provided")
    all_records = []
    for up in files:
        suffix = Path(up.filename).suffix or ".txt"
        with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
            content = await up.read()
            tmp.write(content)
            tmp.flush()
            try:
                recs = ingest_file(tmp.name, title=up.filename)
                all_records.extend(recs)
            finally:
                try: os.unlink(tmp.name)
                except: pass
    if not all_records:
        raise HTTPException(status_code=400, detail="No chunks created")
    job_id = uuid.uuid4().hex
    thread = threading.Thread(target=index_chunks, args=(all_records,), daemon=True)
    thread.start()
    return {"status": "accepted", "job_id": job_id, "chunks": len(all_records)}

class QueryIn(BaseModel):
    question: str
    top_k: int = 10
    rerank_top_n: int = 5
    gen: bool = False

@app.post("/query")
def query(q: QueryIn):
    q_text = q.question.strip()
    if not q_text:
        raise HTTPException(status_code=400, detail="question required")
    q_emb = embedder.embed_texts([q_text])
    with _index_lock:
        results = wrapper.search(q_emb, top_k=q.top_k)
    hits = results[0] if results else []
    candidates = [{'id': h['id'], 'text': h['text'],
                   'metadata': h.get('metadata', {}), 'doc_id': h.get('doc_id')} for h in hits]
    if not candidates:
        return {"answer": "No documents indexed.", "sources": []}
    try:
        reranked = rerank(q_text, candidates, top_n=q.rerank_top_n)
    except Exception:
        reranked = candidates[:q.rerank_top_n]
    if q.gen and openai is not None and OPENAI_API_KEY:
        prompt_chunks = [{'id': c['id'], 'text': c['text'], 'metadata': c.get('metadata', {})} for c in reranked]
        prompt = build_prompt(q_text, prompt_chunks)
        try:
            resp = openai.ChatCompletion.create(
                model=os.environ.get("GEN_MODEL", "gpt-4o-mini"),
                messages=[{"role":"system", "content": prompt}],
                max_tokens=512,
                temperature=0.0,
            )
            answer = resp['choices'][0]['message']['content'].strip()
        except Exception as e:
            answer = f"[Generation failed: {e}]"
        sources = [{'id': c['id'], 'title': c.get('metadata', {}).get('title'),
                    '_rerank_score': c.get('_rerank_score')} for c in reranked]
        return {"answer": answer, "sources": sources}
    else:
        answer_text, evidence = extractive_answer(q_text, reranked, top_k_sentences=2)
        sources = [{'id': c['id'], 'title': c.get('metadata', {}).get('title'),
                    '_rerank_score': c.get('_rerank_score')} for c in reranked]
        return {"answer": answer_text, "evidence": evidence, "sources": sources}

@app.post("/delete-index")
def delete_index():
    try:
        if os.path.exists(wrapper.index_path):
            os.remove(wrapper.index_path)
        if os.path.exists(wrapper.meta_path):
            os.remove(wrapper.meta_path)
        wrapper.metadata = []
        import faiss
        wrapper.index = faiss.IndexFlatIP(wrapper.dim)
        return {"status": "deleted"}
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/health")
def health():
    return {"status": "ok", "indexed_docs": len(wrapper.metadata)}
'''
open("rag_service_v2.py","w").write(rag_service_v2)

# ---------- 3) eval_full.py (corrected) ----------
eval_full = r'''
# eval_full.py (corrected)
import json
from query_demo_v2 import run_query_v2

def main():
    questions = [
        "What does the document say about privacy?",
        "Who manages users and content?",
        "What are acceptable uses?"
    ]
    print("Running sanity-check queries:\n")
    for q in questions:
        try:
            res = run_query_v2(q, top_k=5, rerank_top_n=3,
                               use_openai_emb=False,
                               openai_key=None,
                               gen_with_openai=False)
            print("Q:", q)
            print("Answer:", res.get("answer"))
            if res.get("evidence"):
                print("Evidence:")
                for e in res["evidence"]:
                    title = e.get("metadata",{}).get("title") or e.get("doc_id")
                    print(f" - {title}: {e.get('sentence')[:120]} (score={e.get('score')})")
            print("Sources:", [s.get("title") for s in res.get("sources", [])])
            print("-"*60)
        except Exception as ex:
            print("Error on query:", q, ex)

if __name__=="__main__":
    main()
'''
open("eval_full.py","w").write(eval_full)

# ---------- 4) Dockerfile ----------
dockerfile = r'''
FROM python:3.11-slim
WORKDIR /app
ENV PYTHONDONTWRITEBYTECODE=1 PYTHONUNBUFFERED=1
RUN apt-get update && apt-get install -y build-essential git curl && rm -rf /var/lib/apt/lists/*
COPY requirements-runtime.txt /app/requirements-runtime.txt
RUN pip install --no-cache-dir -r requirements-runtime.txt
COPY . /app
EXPOSE 8000
CMD ["uvicorn","rag_service_v2:app","--host","0.0.0.0","--port","8000"]
'''
open("Dockerfile","w").write(dockerfile)

# ---------- 5) requirements-runtime.txt ----------
requirements_runtime = "\n".join([
    "fastapi",
    "uvicorn[standard]",
    "python-multipart",
    "sentence-transformers",
    "faiss-cpu",
    "pdfplumber",
    "python-docx",
    "beautifulsoup4",
    "openai",
    "pydantic",
    "aiofiles"
])
open("requirements-runtime.txt","w").write(requirements_runtime)

# ---------- 6) docker-compose.yml ----------
docker_compose = r'''
version: '3.8'
services:
  rag:
    build: .
    ports:
      - "8000:8000"
    volumes:
      - .:/app
    environment:
      - OPENAI_API_KEY=${OPENAI_API_KEY:-}
      - EMBED_MODEL=all-MiniLM-L6-v2
      - INDEX_PATH=faiss.index
      - META_PATH=meta.pkl
'''
open("docker-compose.yml","w").write(docker_compose)

print("✅ Wrote rag_service_v2.py, eval_full.py, Dockerfile, requirements-runtime.txt, docker-compose.yml")

# ---------- 7) Run sanity check ----------
print("\n=== Running eval_full.py ===")
try:
    subprocess.check_call([sys.executable, "eval_full.py"])
except subprocess.CalledProcessError as e:
    print("Sanity-check failed:", e)

print("\nAll Week 6–8 files are ready. Next steps:")
print("- Run `uvicorn rag_service_v2:app --reload --port 8000` locally to start the API.")
print("- Or build with Docker: `docker build -t rag-assistant .` then `docker run -p 8000:8000 rag-assistant`.")
print("- Or use docker-compose: `docker-compose up --build`.")


In [None]:
# Run the FastAPI server
get_ipython().system('uvicorn rag_service_v2:app --reload --port 8000')