<a href="https://colab.research.google.com/github/reychely/food-rag-web/blob/main/IR_Project_RAG_APP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
import json, textwrap
from pathlib import Path

nb = {"cells": [], "metadata": {"kernelspec": {"display_name":"Python 3","language":"python","name":"python3"},
                               "language_info": {"name":"python","version":"3.x"}},
      "nbformat": 4, "nbformat_minor": 5}

def md(s): nb["cells"].append({"cell_type":"markdown","metadata":{},"source":s})
def code(s): nb["cells"].append({"cell_type":"code","metadata":{},"execution_count":None,"outputs":[], "source":s})

md("# Food / Diet / Nutrition RAG\n"
"### TXT Ingestion → Hybrid Retrieval → Cross‑Encoder Reranking → Self‑Correction → Evaluation → Notebook UI\n\n"
"This notebook is written for **readers** (course staff / reviewers). Each step explains **why** it exists and **what** it does.\n\n"
"**What’s improved vs. a basic RAG notebook**\n"
"- Hybrid retrieval (BM25 + embeddings)\n"
"- Multi-query expansion + fusion\n"
"- Cross‑encoder reranking (precision boost)\n"
"- Self‑correction verifier pass (reduces hallucinations)\n"
"- Persistence with a DB API (Chroma) so you don’t re-index every run\n"
"- Better notebook UI (filters + citations + expandable chunks)\n"
"- Retrieval evaluation (Hit@K, MRR) + optional generation faithfulness checks\n")

md("## STEP 0 — Install & Imports\n"
"Install only what you need. The notebook runs even if some optional packages are missing, but best results require them.")

code(textwrap.dedent("""
# If you are on Google Colab, uncomment:
# !pip -q install -U sentence-transformers rank-bm25 chromadb pyspellchecker rapidfuzz ipywidgets nltk

import os, re, json, math, time, hashlib
from dataclasses import dataclass
from pathlib import Path
from typing import List, Dict, Tuple, Optional

import numpy as np

# Retrieval / NLP
try:
    from sentence_transformers import SentenceTransformer, CrossEncoder
except Exception:
    SentenceTransformer = None
    CrossEncoder = None

try:
    from rank_bm25 import BM25Okapi
except Exception:
    BM25Okapi = None

try:
    from spellchecker import SpellChecker
except Exception:
    SpellChecker = None

try:
    from rapidfuzz import fuzz
except Exception:
    fuzz = None

# Optional synonyms
try:
    import nltk
    from nltk.corpus import wordnet as wn
except Exception:
    nltk = None
    wn = None

# DB API (persistence)
try:
    import chromadb
    from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
except Exception:
    chromadb = None
    SentenceTransformerEmbeddingFunction = None

# Notebook UI
try:
    import ipywidgets as widgets
    from IPython.display import display, Markdown, clear_output
except Exception:
    widgets = None

print("✅ Imports loaded")
print("sentence-transformers:", bool(SentenceTransformer))
print("CrossEncoder:", bool(CrossEncoder))
print("BM25:", bool(BM25Okapi))
print("Chroma:", bool(chromadb))
print("SpellChecker:", bool(SpellChecker))
print("ipywidgets:", bool(widgets))
print("WordNet:", bool(wn))
"""))

md("## STEP 1 — Data Sources (API → Document DB)\n"
"**Goal:** make your RAG pipeline work like a real application:\n"
"- Documents live in a \"DB\" (folder, Drive, S3, etc.)\n"
"- Ingestion code talks to an **API interface**\n\n"
"In this project we implement a simple **Local Folder API** and keep a placeholder for Google Drive.\n"
"If you already use Google Drive in your existing notebook, you can swap the `LocalFolderSource` with your Drive implementation.")

code(textwrap.dedent("""
DATA_DIR = Path("data")
TXT_DIR = DATA_DIR / "txt"      # recommended
FALLBACK_TXT_DIR = DATA_DIR     # fallback if you keep txt directly under ./data

@dataclass
class DocFile:
    source_type: str      # "txt" now, later "pdf"/"web"
    path: str             # string path or remote id
    name: str
    bytes_data: bytes     # raw bytes (API style)

class DocumentSourceAPI:
    \"\"\"Minimal interface for any future source: local folder, Google Drive, S3, etc.\"\"\"
    def list_files(self) -> List[DocFile]:
        raise NotImplementedError

class LocalFolderSource(DocumentSourceAPI):
    def __init__(self, folder: Path):
        self.folder = folder

    def list_files(self) -> List[DocFile]:
        files = []
        if not self.folder.exists():
            return files
        for fp in sorted(self.folder.rglob("*.txt")):
            files.append(DocFile(
                source_type="txt",
                path=str(fp),
                name=fp.name,
                bytes_data=fp.read_bytes()
            ))
        return files

# Choose which folder you use
source = LocalFolderSource(TXT_DIR if TXT_DIR.exists() else FALLBACK_TXT_DIR)
doc_files = source.list_files()

print("Found files:", len(doc_files))
for f in doc_files[:10]:
    print("-", f.name, "|", f.path)
"""))

md("## STEP 2 — Ingestion (TXT)\n"
"**Why:** raw text is messy. A consistent ingestion pipeline improves retrieval quality.\n\n"
"We do:\n"
"1) UTF‑8 decoding (safe for mixed languages)\n"
"2) Cleaning (whitespace, punctuation normalization)\n"
"3) Structure detection: paragraphs by double newline; long paragraphs → sentence packing\n"
"4) Metadata per chunk: `source_file, topic, language, reliability, ...`\n")

code(textwrap.dedent("""
def detect_language_light(text: str) -> str:
    # very light heuristic: Hebrew => he, else en
    if re.search(r"[\\u0590-\\u05FF]", text):
        return "he"
    return "en"

def clean_text(s: str) -> str:
    s = s.replace("\\r\\n", "\\n").replace("\\r", "\\n")
    s = re.sub(r"[ \\t]+", " ", s)
    s = re.sub(r"\\n{3,}", "\\n\\n", s)
    s = re.sub(r"[“”]", '"', s)
    s = re.sub(r"[‘’]", "'", s)
    return s.strip()

def split_paragraphs(s: str) -> List[str]:
    return [p.strip() for p in s.split("\\n\\n") if p.strip()]

def split_sentences_best_effort(s: str) -> List[str]:
    # practical sentence split: .!? + whitespace
    parts = re.split(r"(?<=[.!?])\\s+", s.strip())
    return [p.strip() for p in parts if p.strip()]

def guess_topic(text: str, filename: str) -> str:
    # simple rules (extend if you want)
    name = filename.lower()
    t = text.lower()
    if any(k in name for k in ["nutrition", "diet", "health"]) or any(k in t for k in ["vitamin", "calorie", "protein", "fiber"]):
        return "nutrition"
    if any(k in name for k in ["recipe", "cook", "meal"]) or any(k in t for k in ["ingredients", "bake", "boil"]):
        return "food"
    return "unknown"

def estimate_reliability(source_type: str, filename: str) -> str:
    # For curated course projects, you can set this manually.
    # Example heuristic:
    if "wiki" in filename.lower():
        return "medium"
    return "unknown"
"""))

md("## STEP 3 — Chunking (recursive + overlap)\n"
"**Why overlap matters:** it prevents important details from being split across chunk boundaries.\n\n"
"Design choices:\n"
"- Not too small (context loss)\n"
"- Not too big (dilution)\n"
"- Chunk IDs are stable and used as citations")

code(textwrap.dedent("""
CHUNK_MAX_CHARS = 1200
CHUNK_OVERLAP_CHARS = 200

@dataclass
class Chunk:
    chunk_id: str
    doc_id: str
    source_type: str
    source_file: str
    text: str
    meta: Dict

def recursive_chunk(paragraph: str, max_chars: int, overlap: int) -> List[str]:
    if len(paragraph) <= max_chars:
        return [paragraph]

    sents = split_sentences_best_effort(paragraph)
    chunks, cur = [], ""
    for sent in sents:
        if len(cur) + len(sent) + 1 <= max_chars:
            cur = (cur + " " + sent).strip()
        else:
            if cur:
                chunks.append(cur)
            cur = sent
    if cur:
        chunks.append(cur)

    if overlap > 0 and len(chunks) > 1:
        out, prev = [], ""
        for c in chunks:
            out.append((prev[-overlap:] + " " + c).strip() if prev else c)
            prev = c
        return out
    return chunks

def stable_id(s: str) -> str:
    return hashlib.sha1(s.encode("utf-8", errors="ignore")).hexdigest()[:10]

def ingest_to_chunks(files: List[DocFile]) -> List[Chunk]:
    out: List[Chunk] = []
    for f in files:
        # UTF-8 decode
        raw = f.bytes_data.decode("utf-8", errors="replace")
        cleaned = clean_text(raw)

        doc_id = Path(f.name).stem
        lang = detect_language_light(cleaned)
        topic = guess_topic(cleaned, f.name)
        reliability = estimate_reliability(f.source_type, f.name)

        paras = split_paragraphs(cleaned)
        for pi, p in enumerate(paras):
            pieces = recursive_chunk(p, CHUNK_MAX_CHARS, CHUNK_OVERLAP_CHARS)
            for ci, piece in enumerate(pieces):
                cid = f"{doc_id}::p{pi}::c{ci}::{stable_id(piece)}"
                out.append(Chunk(
                    chunk_id=cid,
                    doc_id=doc_id,
                    source_type=f.source_type,
                    source_file=f.path,
                    text=piece,
                    meta={
                        "source_file": f.path,
                        "source_type": f.source_type,
                        "topic": topic,
                        "language": lang,
                        "reliability": reliability,
                        "section": None,
                        "date": None
                    }
                ))
    return out

chunks = ingest_to_chunks(doc_files)
print("Chunks:", len(chunks))
print("Sample:", chunks[0].chunk_id if chunks else "—")
"""))

md("## STEP 4 — Indexing with a DB API (Chroma persistence)\n"
"**Why:** If your corpus grows, rebuilding indexes every run becomes slow.\n\n"
"We use **Chroma** as a local persistent vector DB:\n"
"- stores embeddings + documents + metadata\n"
"- supports metadata filters (topic/language)\n\n"
"If Chroma isn’t installed, the notebook falls back to in‑memory embeddings.")

code(textwrap.dedent("""
EMB_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
CHROMA_DIR = "chroma_food_rag"

def get_embedder():
    if SentenceTransformer is None:
        raise RuntimeError("Install sentence-transformers in STEP 0.")
    return SentenceTransformer(EMB_MODEL_NAME)

emb_model = get_embedder() if chunks else None

def build_chroma_collection(chunks: List[Chunk]):
    if chromadb is None or SentenceTransformerEmbeddingFunction is None:
        return None

    client = chromadb.PersistentClient(path=CHROMA_DIR)
    emb_fn = SentenceTransformerEmbeddingFunction(model_name=EMB_MODEL_NAME)

    col = client.get_or_create_collection(
        name="food_rag_chunks",
        embedding_function=emb_fn,
        metadata={"hnsw:space": "cosine"},
    )

    # Upsert only missing ids (fast incremental)
    existing = set()
    try:
        existing = set(col.get(include=[]).get("ids", []))
    except Exception:
        existing = set()

    new_ids, new_docs, new_metas = [], [], []
    for c in chunks:
        if c.chunk_id not in existing:
            new_ids.append(c.chunk_id)
            new_docs.append(c.text)
            new_metas.append(c.meta)

    if new_ids:
        col.add(ids=new_ids, documents=new_docs, metadatas=new_metas)
        print(f"✅ Added {len(new_ids)} new chunks to Chroma")
    else:
        print("✅ Chroma already up to date (no new chunks)")

    return col

chroma_col = build_chroma_collection(chunks)
print("Chroma ready:", bool(chroma_col))
"""))

md("## STEP 5 — Hybrid Retrieval (BM25 + embeddings)\n"
"**Why hybrid:**\n"
"- BM25 helps exact terms (nutrient names, measurements)\n"
"- embeddings help semantic matches (\"foods for BP\" → \"hypertension\")\n\n"
"We then combine them with **fusion** across multiple query variants.")

code(textwrap.dedent("""
# --- BM25 setup (in-memory) ---
def tokenize(text: str) -> List[str]:
    return re.findall(r"\\w+", text.lower())

bm25 = None
if BM25Okapi is not None and chunks:
    bm25 = BM25Okapi([tokenize(c.text) for c in chunks])

# --- Embedding matrix (fallback when Chroma not available) ---
vecs = None
if chromadb is None and emb_model is not None and chunks:
    vecs = emb_model.encode([c.text for c in chunks], normalize_embeddings=True, show_progress_bar=True).astype(np.float32)

def minmax_norm(vals: List[float]) -> List[float]:
    if not vals:
        return []
    mn, mx = min(vals), max(vals)
    if mx - mn < 1e-9:
        return [1.0 for _ in vals]
    return [(v - mn) / (mx - mn) for v in vals]

def retrieve_bm25(q: str, top_k: int = 10) -> List[Tuple[int, float]]:
    if bm25 is None:
        return []
    scores = bm25.get_scores(tokenize(q))
    idx = np.argsort(-scores)[:top_k]
    return [(int(i), float(scores[i])) for i in idx]

def retrieve_embed_fallback(q: str, top_k: int = 10) -> List[Tuple[int, float]]:
    if vecs is None or emb_model is None:
        return []
    qv = emb_model.encode([q], normalize_embeddings=True, show_progress_bar=False).astype(np.float32)[0]
    sims = vecs @ qv
    idx = np.argsort(-sims)[:top_k]
    return [(int(i), float(sims[i])) for i in idx]

def retrieve_chroma(q: str, top_k: int = 10, where: Optional[Dict]=None):
    if chroma_col is None:
        return []
    res = chroma_col.query(query_texts=[q], n_results=top_k, where=where, include=["documents","metadatas","distances","ids"])
    out = []
    for cid, doc, meta, dist in zip(res["ids"][0], res["documents"][0], res["metadatas"][0], res["distances"][0]):
        # Chroma returns distance; for cosine, lower is better. Convert to similarity-ish score:
        score = 1.0 - float(dist)
        out.append({"chunk_id": cid, "text": doc, "meta": meta, "score": score})
    return out
"""))

md("## STEP 6 — Query Improving (nutrition-focused)\n"
"To improve recall, we generate **2–4 alternative queries**:\n"
"- normalization (units)\n"
"- lightweight spell correction\n"
"- nutrition synonym expansion (domain dictionary)\n"
"- optional WordNet + fuzzy matching\n\n"
"Important: avoid aggressive rewrites that change meaning.")

code(textwrap.dedent("""
SYNONYMS = {
    # conditions
    "hypertension": ["high blood pressure", "bp", "blood pressure"],
    "high blood pressure": ["hypertension", "bp"],
    "cholesterol": ["ldl", "hdl", "blood lipids"],
    "diabetes": ["dm", "diabetes mellitus", "blood sugar", "glucose"],
    "obesity": ["overweight", "high bmi", "bmi"],

    # nutrients
    "vitamin c": ["ascorbic acid", "vit c"],
    "vitamin d": ["d3", "cholecalciferol", "calciferol"],
    "omega 3": ["epa", "dha", "fish oil"],
    "fiber": ["dietary fiber", "roughage"],
    "protein": ["amino acids", "lean protein"],
    "carbohydrates": ["carbs", "sugars", "starch"],
    "salt": ["sodium", "na"],

    # diet patterns
    "mediterranean diet": ["olive oil diet", "med diet"],
    "low carb": ["ketogenic", "keto"],
    "weight loss": ["calorie deficit", "fat loss"],
}

def normalize_query(q: str) -> str:
    q = q.strip().lower()
    q = re.sub(r"\\s+", " ", q)
    q = q.replace("kilogram", "kg").replace("kilograms", "kg")
    q = q.replace("milligram", "mg").replace("milligrams", "mg")
    q = q.replace("gram", "g").replace("grams", "g")
    return q

def light_spell_fix(q: str) -> str:
    if SpellChecker is None:
        return q
    sp = SpellChecker()
    toks = q.split()
    fixed = []
    for t in toks:
        fixed.append(t if len(t) <= 2 else (sp.correction(t) or t))
    return " ".join(fixed)

def wordnet_synonyms(term: str, max_syn: int = 2) -> List[str]:
    if wn is None:
        return []
    out = set()
    for syn in wn.synsets(term):
        for lemma in syn.lemmas():
            s = lemma.name().replace("_", " ").lower()
            if s != term.lower():
                out.add(s)
            if len(out) >= max_syn:
                return list(out)
    return list(out)

def expand_query(q: str, max_alts: int = 4) -> List[str]:
    qn = light_spell_fix(normalize_query(q))
    alts = [qn]

    # dictionary expansions
    for key, syns in SYNONYMS.items():
        if key in qn:
            for s in syns[:2]:
                alts.append(qn.replace(key, s))

    # WordNet expansion only for short queries
    if wn is not None and len(qn.split()) <= 3:
        for w in qn.split():
            for s in wordnet_synonyms(w, max_syn=1):
                alts.append(qn.replace(w, s))

    # fuzzy match if user typed close to a key
    if fuzz is not None:
        best_key, best_score = None, 0
        for key in SYNONYMS.keys():
            score = fuzz.partial_ratio(qn, key)
            if score > best_score:
                best_score, best_key = score, key
        if best_key and best_score >= 90 and best_key not in qn:
            alts.append(qn + " " + best_key)

    # short focused alternative
    short = " ".join([t for t in qn.split() if len(t) > 2][:6]).strip()
    if short and short != qn:
        alts.append(short)

    # dedupe + cap
    uniq, seen = [], set()
    for a in alts:
        a = a.strip()
        if a and a not in seen:
            uniq.append(a)
            seen.add(a)
    return uniq[:max_alts]

print(expand_query("Hypertensoin vitamin C foods"))
"""))

md("## STEP 7 — Retrieval Fusion + Cross‑Encoder Reranking\n"
"**Fusion** combines results from multiple query variants.\n"
"**Cross‑encoder reranking** improves precision by directly scoring (question, chunk) pairs.\n\n"
"This is usually the biggest retrieval quality boost for RAG.")

code(textwrap.dedent("""
RERANK_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2"
reranker = CrossEncoder(RERANK_MODEL) if CrossEncoder is not None else None

def hybrid_retrieve_one_query(q: str, top_k: int = 12, where: Optional[Dict]=None) -> Dict[str, float]:
    # returns chunk_id -> score
    scores = {}

    # embeddings (Chroma if possible)
    if chroma_col is not None:
        emb_res = retrieve_chroma(q, top_k=top_k, where=where)
        for r in emb_res:
            scores[r["chunk_id"]] = max(scores.get(r["chunk_id"], 0.0), float(r["score"]))
    else:
        emb = retrieve_embed_fallback(q, top_k=top_k)
        emb_scores = minmax_norm([s for _, s in emb])
        for (i, _), sc in zip(emb, emb_scores):
            scores[chunks[i].chunk_id] = max(scores.get(chunks[i].chunk_id, 0.0), 0.6 * sc)

    # BM25 (in-memory)
    bm = retrieve_bm25(q, top_k=top_k)
    bm_scores = minmax_norm([s for _, s in bm])
    for (i, _), sc in zip(bm, bm_scores):
        scores[chunks[i].chunk_id] = max(scores.get(chunks[i].chunk_id, 0.0), 0.4 * sc)

    return scores

def fusion_retrieve(question: str, fused_topk: int = 20, where: Optional[Dict]=None):
    variants = expand_query(question)
    fused: Dict[str, float] = {}

    for q in variants:
        s = hybrid_retrieve_one_query(q, top_k=max(12, fused_topk), where=where)
        for cid, sc in s.items():
            fused[cid] = max(fused.get(cid, 0.0), sc)

    ranked = sorted(fused.items(), key=lambda x: x[1], reverse=True)[:fused_topk]

    # Resolve chunk text + metadata
    by_id = {c.chunk_id: c for c in chunks}
    results = []
    for cid, sc in ranked:
        c = by_id.get(cid)
        if c is None:
            continue
        results.append({
            "chunk_id": c.chunk_id,
            "text": c.text,
            "meta": c.meta,
            "score": float(sc),
            "source_file": c.source_file
        })
    return results, variants

def cross_encoder_rerank(question: str, retrieved: List[Dict], top_n: int = 10) -> List[Dict]:
    if reranker is None:
        # fallback: no reranking available
        return retrieved[:top_n]
    pairs = [(question, r["text"]) for r in retrieved]
    ce_scores = reranker.predict(pairs)
    out = []
    for r, s in zip(retrieved, ce_scores):
        r2 = dict(r)
        r2["rerank_score"] = float(s)
        out.append(r2)
    out.sort(key=lambda x: x.get("rerank_score", -1e9), reverse=True)
    return out[:top_n]

# Demo
question = "What foods help with high blood pressure?"
retrieved, variants = fusion_retrieve(question, fused_topk=25, where={"topic": {"$in": ["nutrition","food","unknown"]}} if chroma_col else None)
reranked = cross_encoder_rerank(question, retrieved, top_n=10)

print("Query variants:", variants)
print("Top reranked:")
for r in reranked[:3]:
    print("-", r["chunk_id"], "| rerank:", round(r.get("rerank_score", r["score"]), 3), "|", Path(r["source_file"]).name)
"""))

md("## STEP 8 — Augmentation (Prompt with citations)\n"
"We build a strict prompt:\n"
"- Answer only from context\n"
"- If missing → say \"I don't know\"\n"
"- Always return citations (chunk IDs)")

code(textwrap.dedent("""
def build_context(chunks_list: List[Dict], max_chunks: int = 6, max_chars_each: int = 900) -> str:
    ctx = []
    for r in chunks_list[:max_chunks]:
        ctx.append(f"[{r['chunk_id']}] (source={Path(r['source_file']).name})\\n{r['text'][:max_chars_each]}")
    return "\\n\\n".join(ctx)

def build_prompt(question: str, chunks_list: List[Dict]) -> str:
    context = build_context(chunks_list)
    return f\"\"\"You are a helpful assistant.
Answer the question ONLY using the provided context.
If the context does not contain the answer, say: "I don't know from the provided documents."

Question:
{question}

Context:
{context}

Rules:
- Use ONLY context facts.
- End with: Citations: [chunk_id1, chunk_id2, ...]
\"\"\"

prompt = build_prompt(question, reranked)
print(prompt[:900], "...")
"""))

md("## STEP 9 — Generation + Self‑Correction Verifier\n"
"**Generation** produces the initial answer.\n"
"**Verifier pass** removes unsupported claims and enforces citations.\n\n"
"In this notebook we include a safe **stub generator** so it runs everywhere.\n"
"Replace `generator()` with your real model/API call when you are ready.")

code(textwrap.dedent("""
def generator_stub(prompt: str) -> str:
    # Replace this with OpenAI / local model call.
    # Keep it deterministic for reproducible evaluation.
    return (
        "I don't know from the provided documents.\\n"
        "Citations: []"
    )

def generate_answer(question: str, chunks_list: List[Dict]) -> Tuple[str, str]:
    prompt = build_prompt(question, chunks_list)
    draft = generator_stub(prompt)
    return draft, prompt

def verifier_pass(question: str, draft: str, chunks_list: List[Dict]) -> str:
    # If you have a real LLM, use it here.
    # This stub enforces a rule: if draft has no citations, force 'I don't know'.
    if "Citations" not in draft:
        return 'I don\\'t know from the provided documents.\\nCitations: []'
    return draft

draft_answer, used_prompt = generate_answer(question, reranked)
final_answer = verifier_pass(question, draft_answer, reranked)

print("Draft:", draft_answer)
print("\\nFinal:", final_answer)
"""))

md("## STEP 10 — Evaluation (Retrieval quality)\n"
"We evaluate retrieval using:\n"
"- **Hit@K** (did we retrieve at least one relevant chunk?)\n"
"- **MRR@K** (how early did the first relevant chunk appear?)\n\n"
"Create `eval_questions.jsonl` lines like:\n"
"`{\"query\": \"...\", \"relevant_chunk_ids\": [\"...\"]}`\n")

code(textwrap.dedent("""
def hit_at_k(ranked_ids: List[str], relevant: set, k: int) -> float:
    return 1.0 if any(cid in relevant for cid in ranked_ids[:k]) else 0.0

def mrr_at_k(ranked_ids: List[str], relevant: set, k: int) -> float:
    for i, cid in enumerate(ranked_ids[:k], start=1):
        if cid in relevant:
            return 1.0 / i
    return 0.0

def evaluate_retrieval(eval_path: str = "eval_questions.jsonl", ks=(1,3,5,10)):
    p = Path(eval_path)
    if not p.exists():
        print(f"❌ Missing {eval_path}. Create it first.")
        return

    rows = [json.loads(line) for line in p.read_text(encoding="utf-8").splitlines() if line.strip()]
    if not rows:
        print("❌ No evaluation rows found.")
        return

    stats = {k: {"hit": [], "mrr": []} for k in ks}

    for r in rows:
        q = r["query"]
        relevant = set(r["relevant_chunk_ids"])

        retrieved, _ = fusion_retrieve(q, fused_topk=max(ks))
        reranked = cross_encoder_rerank(q, retrieved, top_n=max(ks))
        ranked_ids = [x["chunk_id"] for x in reranked]

        for k in ks:
            stats[k]["hit"].append(hit_at_k(ranked_ids, relevant, k))
            stats[k]["mrr"].append(mrr_at_k(ranked_ids, relevant, k))

    print("=== Retrieval Evaluation ===")
    for k in ks:
        print(f"Hit@{k}: {float(np.mean(stats[k]['hit'])):.3f} | MRR@{k}: {float(np.mean(stats[k]['mrr'])):.3f}")

# evaluate_retrieval("eval_questions.jsonl")
"""))

md("## STEP 11 — Improved Notebook UI (filters + citations + expandable chunks)\n"
"This UI is designed for a demo:\n"
"- Query box\n"
"- Topic/language filters (metadata)\n"
"- TopK control\n"
"- Retrieved chunks shown in expandable accordions\n"
"- Prompt preview\n"
"- Answer + citations area")

code(textwrap.dedent("""
if widgets is None:
    print("❌ ipywidgets not installed. Install it in STEP 0.")
else:
    q_box = widgets.Text(value="foods for high blood pressure", description="Query:", layout=widgets.Layout(width="55%"))
    topk = widgets.IntSlider(value=10, min=5, max=20, step=1, description="TopK:", continuous_update=False)

    topic_dd = widgets.Dropdown(
        options=["any", "nutrition", "food", "unknown"],
        value="any",
        description="Topic:"
    )
    lang_dd = widgets.Dropdown(
        options=["any", "en", "he"],
        value="any",
        description="Lang:"
    )

    btn = widgets.Button(description="Search", button_style="primary")
    out = widgets.Output()

    def make_where(topic_val: str, lang_val: str):
        if chroma_col is None:
            return None
        where = {}
        if topic_val != "any":
            where["topic"] = topic_val
        if lang_val != "any":
            where["language"] = lang_val
        return where if where else None

    def on_search(_):
        with out:
            clear_output()
            question = q_box.value.strip()
            if not question:
                display(Markdown("**Please enter a query.**"))
                return

            where = make_where(topic_dd.value, lang_dd.value)
            retrieved, variants = fusion_retrieve(question, fused_topk=max(25, topk.value), where=where)
            reranked = cross_encoder_rerank(question, retrieved, top_n=topk.value)

            display(Markdown(f"### Query variants\n`{variants}`"))
            display(Markdown("### Top reranked chunks (click to expand)"))

            # Accordion of chunks
            items = []
            titles = []
            for i, r in enumerate(reranked, start=1):
                header = f"#{i}  rerank={r.get('rerank_score', r['score']):.3f}  |  {Path(r['source_file']).name}  |  {r['chunk_id']}"
                titles.append(header)
                items.append(widgets.Textarea(value=r["text"], layout=widgets.Layout(width="100%", height="130px")))

            acc = widgets.Accordion(children=items)
            for i, t in enumerate(titles):
                acc.set_title(i, t)
            display(acc)

            prompt = build_prompt(question, reranked)
            display(Markdown("### Prompt preview"))
            display(widgets.Textarea(value=prompt[:2500], layout=widgets.Layout(width="100%", height="220px")))

            draft, _ = generate_answer(question, reranked)
            final = verifier_pass(question, draft, reranked)

            display(Markdown("### Final answer (after verifier)"))
            display(widgets.Textarea(value=final, layout=widgets.Layout(width="100%", height="120px")))

    btn.on_click(on_search)
    display(widgets.VBox([widgets.HBox([q_box, topk, topic_dd, lang_dd, btn]), out]))
"""))

out_path = Path("/mnt/data/Rag_Ver3_FoodRAG_Rerank_Verify_Chroma_UI.ipynb")
out_path.write_text(json.dumps(nb, ensure_ascii=False, indent=2), encoding="utf-8")
str(out_path)



FileNotFoundError: [Errno 2] No such file or directory: '\\mnt\\data\\Rag_Ver3_FoodRAG_Rerank_Verify_Chroma_UI.ipynb'