# 05 Semantic Search (Sentence Transformers + FAISS)

Builds an embedding index for similar-tweet search for error analysis.

**Optional:** requires `sentence-transformers` and `faiss-cpu`. If unavailable, follow the install cell.

In [None]:

%run ./00_shared_utils.ipynb
# %pip install -q sentence-transformers faiss-cpu

import pandas as pd, numpy as np, os, json
from pathlib import Path

DATA_PATH = Path('/mnt/data/Tweets.csv')
df = pd.read_csv(DATA_PATH)

texts = df['text'].fillna('').tolist()
ids = df['textID'].fillna('').tolist() if 'textID' in df.columns else list(range(len(texts)))

try:
    from sentence_transformers import SentenceTransformer
    import faiss
    model = SentenceTransformer('all-MiniLM-L6-v2')
    embs = model.encode(texts, batch_size=64, show_progress_bar=True, normalize_embeddings=True)
    dim = embs.shape[1]
    index = faiss.IndexFlatIP(dim)
    index.add(embs.astype('float32'))
    faiss.write_index(index, 'model_registry/faiss.index')
    pd.DataFrame({'id': ids}).to_csv('model_registry/faiss_ids.csv', index=False)
    print('FAISS index built and saved.')

    q = "I love this!"
    qv = model.encode([q], normalize_embeddings=True)
    D, I = index.search(qv.astype('float32'), 5)
    print('Query:', q)
    print('Top-5 matches:')
    for rank, i in enumerate(I[0]):
        print(rank+1, ids[i], texts[i][:120])
except Exception as e:
    print("Semantic search unavailable — install sentence-transformers and faiss-cpu to enable.\n", e)