In [5]:
from pathlib import Path
import pickle
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss

In [7]:
import os

# Force CPU mode to avoid GPU memory issues
os.environ["CUDA_VISIBLE_DEVICES"] = ""

# === User-configurable paths & params ===
FAISS_INDEX_PATH = Path("./faiss_index.bin")        # your .bin file
META_PKL_PATH    = Path("./faiss_meta.pkl")        # your metadata pickle
HF_MODEL         = "sentence-transformers/paraphrase-mpnet-base-v2"  # same model used to index
TOP_K            = 5                                      # number of results to return
USE_GPU          = False                                  # set True if using faiss-gpu
NORMALIZE_EMBS   = True                                   # True if embeddings were normalized before indexing (common)

# === Load FAISS index ===
print("Loading FAISS index from", FAISS_INDEX_PATH)
index = faiss.read_index(str(FAISS_INDEX_PATH))

# (optional) move to GPU if you have faiss-gpu installed and want to use GPU
if USE_GPU:
    res = faiss.StandardGpuResources()
    index = faiss.index_cpu_to_gpu(res, 0, index)
    print("Moved FAISS index to GPU")

d = index.d  # dimension
print(f"Index loaded. n_items={index.ntotal}, dim={d}")

# === Load metadata list (a list of dicts) ===
print("Loading metadata from", META_PKL_PATH)
with open(META_PKL_PATH, "rb") as f:
    metas = pickle.load(f)
print(f"Loaded {len(metas)} metadata entries")

# Quick safety check
if index.ntotal != len(metas):
    print("Warning: index.ntotal != len(metas) — they should match. Proceeding anyway.")

# === Load model to encode query (CPU-only) ===
print("Loading embedding model:", HF_MODEL, "(CPU mode)")
model = SentenceTransformer(HF_MODEL, device='cpu')
print("✅ Model loaded on CPU")

# === Helper function to query ===
def query_faiss(text_query: str, top_k: int = TOP_K):
    # Encode
    q_emb = model.encode([text_query], normalize_embeddings=False)  # we control normalization below
    q_vec = q_emb.astype("float32")[0]

    # Normalize if needed (for cosine similarity with IndexFlatIP)
    if NORMALIZE_EMBS:
        norm = np.linalg.norm(q_vec)
        if norm > 0:
            q_vec = q_vec / norm

    # Ensure dimension matches
    if q_vec.shape[0] != index.d:
        raise ValueError(f"Dimension mismatch: query vector dim={q_vec.shape[0]} vs index dim={index.d}")

    # faiss expects shape (n, d)
    q_vec = np.expand_dims(q_vec.astype("float32"), axis=0)

    # Search
    distances, indices = index.search(q_vec, top_k)  # distances shape (1, k), indices shape (1, k)
    distances = distances[0]
    indices = indices[0]

    # Format results with metadata
    results = []
    for score, idx in zip(distances, indices):
        if idx < 0 or idx >= len(metas):
            results.append({"id": None, "score": float(score), "meta": None})
            continue
        meta = metas[idx]
        # If index uses inner product with normalized vectors, score ~= cosine similarity
        results.append({
            "id": meta.get("id", idx),
            "score": float(score),
            "title": meta.get("title"),
            "text": meta.get("concatenated_text") or meta.get("text") or meta.get("raw_markdown"),
            "meta": meta
        })
    return results

# === Example usage ===
q = "what are finance courses names ?"
res = query_faiss(q, top_k=TOP_K)

print(f"\nTop {TOP_K} results for query: {q}\n" + "-"*60)
for i, r in enumerate(res, start=1):
    print(f"{i}. id={r['id']}  score={r['score']:.4f}  title={r.get('title')}")
    snippet = (r.get("text") or "")[:400].replace("\n", " ")
    print("   ", snippet)
    # If you want to inspect full metadata:
    # print("   meta:", r["meta"])
    print()

Loading FAISS index from faiss_index.bin
Index loaded. n_items=88, dim=768
Loading metadata from faiss_meta.pkl
Loaded 88 metadata entries
Loading embedding model: sentence-transformers/paraphrase-mpnet-base-v2 (CPU mode)
✅ Model loaded on CPU

Top 5 results for query: what are finance courses names ?
------------------------------------------------------------
1. id=36  score=0.5810  title=FINANCE Career Objectives
    FINANCE Career Objectives  FINANCE Career Objectives   The field of finance deals with the acquisition and efficient allocation of financial resources by business firms, governments, and individuals. The Bachelor of Science in Finance is designed to develop an understanding of financial markets and institutions and to provide students with both the theoretical background in finance and the analyti

2. id=40  score=0.5631  title=Minor
    Minor  Minor   To minor in Finance, the student must successfully take a total of 5 courses (15 credits). These courses are as follows

### Reranker

In [8]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# Use a lightweight cross-encoder for reranking
RERANK_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2"
print("Loading reranker model:", RERANK_MODEL, "(CPU mode)")
tokenizer = AutoTokenizer.from_pretrained(RERANK_MODEL)
reranker_model = AutoModelForSequenceClassification.from_pretrained(RERANK_MODEL)

# Force CPU mode
device = "cpu"
reranker_model.to(device)
print(f"✅ Reranker loaded on {device}")

# === Helper function to rerank retrieved results ===
def rerank_results(query: str, results: list):
    if not results:
        return []

    texts = [r["text"] for r in results]
    # Tokenize query + candidate pairs
    inputs = tokenizer([query]*len(texts), texts, padding=True, truncation=True, return_tensors="pt").to(device)
    
    with torch.no_grad():
        scores = reranker_model(**inputs).logits.squeeze(-1)  # shape: (n_candidates,)

    scores = scores.cpu().numpy()
    # Attach reranker scores
    for r, s in zip(results, scores):
        r["rerank_score"] = float(s)

    # Sort by reranker score descending
    reranked = sorted(results, key=lambda x: x["rerank_score"], reverse=True)
    return reranked

# === Example usage ===
reranked_res = rerank_results(q, res)

print(f"\nTop {TOP_K} reranked results for query: {q}\n" + "-"*60)
for i, r in enumerate(reranked_res, start=1):
    print(f"{i}. id={r['id']}  score={r['score']:.4f}  rerank_score={r['rerank_score']:.4f}  title={r.get('title')}")
    snippet = (r.get("text") or "")[:400].replace("\n", " ")
    print("   ", snippet)
    print()

Loading reranker model: cross-encoder/ms-marco-MiniLM-L-6-v2 (CPU mode)
✅ Reranker loaded on cpu

Top 5 reranked results for query: what are finance courses names ?
------------------------------------------------------------
1. id=37  score=0.5376  rerank_score=0.7419  title=Major
    Major  Major   To major in Finance, the student must successfully complete 12 courses (36 credits). These courses are finance courses and subject related courses. In addition, the student has two options for his/her senior project, in the last semester of his/her senior (graduation) year. These options are:   Option I: He/she can take a full time Internship Project course (12 credits - FIN 499). 

2. id=36  score=0.5810  rerank_score=0.1658  title=FINANCE Career Objectives
    FINANCE Career Objectives  FINANCE Career Objectives   The field of finance deals with the acquisition and efficient allocation of financial resources by business firms, governments, and individuals. The Bachelor of Science in Fin