In [None]:
%pip install -q -r requirements.txt 

In [None]:
# 🔧 Config | run once per session
import os, pathlib, textwrap, pprint

# (A)  set / override env vars _for this Colab kernel only_
os.environ["MODEL_ID"]   = "intfloat/multilingual-e5-base"          # or any HF model id
os.environ["INDEX_PATH"] = "data/faiss_index"
os.environ["MAP_PATH"]   = "data/doc_map.pkl"

# (B)  optional: write them into a .env file so downstream scripts pick them up too
env_path = pathlib.Path(".env")
env_path.write_text(textwrap.dedent(f"""
    MODEL_ID={os.environ['MODEL_ID']}
    INDEX_PATH={os.environ['INDEX_PATH']}
    MAP_PATH={os.environ['MAP_PATH']}
""").strip())

print("✅  Environment configured")
pprint.pprint(dict(os.environ | {k:v for k,v in os.environ.items() if k in ("MODEL_ID","INDEX_PATH","MAP_PATH")}))

In [None]:
%python scripts/ingest_protocol.py --dir data/raw_pdfs --recursive

In [None]:
%python src/indexing/build_index.py --hf-model intfloat/multilingual-e5-base

In [None]:
import faiss, pickle, numpy as np
from sentence_transformers import SentenceTransformer
model  = SentenceTransformer(os.environ["MODEL_ID"])
index  = faiss.read_index(os.environ["INDEX_PATH"])
snip   = pickle.load(open(os.environ["MAP_PATH"], "rb"))

def search(q, k=3):
    vec = model.encode(q, normalize_embeddings=True).astype("float32")[None]
    D, I = index.search(vec, k)
    for rank, idx in enumerate(I[0], 1):
        print(f"{rank}. sim={D[0][rank-1]:.3f} → {snip[idx][:120]}…\\n")
search("головний біль в скроневій ділянці")