# Index Construction

In [7]:
import pandas
import torch
import faiss

In [12]:
def build_faiss_index(vector_file="qa_tensors_bert.pt", output_index="qa_faiss_index_bert.index"):
    """
    Build a FAISS index from saved question embeddings and save it to disk.
    """
    # Load the saved vectors (PyTorch tensor)
    vectors = torch.load(vector_file)
    vectors = vectors.numpy().astype("float32")  # Convert to float32 (FAISS requires this)

    # Get the embedding dimension
    dim = vectors.shape[1]

    # Create FAISS index (L2 distance metric)
    index = faiss.IndexFlatL2(dim)

    # Add all vectors to the index
    index.add(vectors)
    print(f"Indexed {vectors.shape[0]} vectors with dimension {dim}")

    # Save the index to disk
    faiss.write_index(index, output_index)
    print(f"FAISS index saved to {output_index}")

    return index


In [13]:
if __name__ == "__main__":
    # Build the FAISS index from question embeddings
    index = build_faiss_index("../retriever/qa_tensors_bert.pt", "../retriever/qa_faiss_index_bert.index")

    # Test search: query the first vector and retrieve the top-5 nearest neighbors
    query_vector = index.reconstruct(0).reshape(1, -1)  
    distances, indices = index.search(query_vector, k=5)
    print("Top 5 results:", indices, distances)

Indexed 221 vectors with dimension 768
FAISS index saved to ../retriever/qa_faiss_index_bert.index
Top 5 results: [[ 0 20  6 61 21]] [[  0.      128.62082 135.5927  138.56033 145.50655]]


In [18]:
def build_faiss_index(vector_file="qa_tensors_trans.pt", output_index="qa_faiss_index_trans.index"):
    """
    Build a FAISS index from saved question embeddings and save it to disk.
    """
    # Load the saved vectors (PyTorch tensor)
    vectors = torch.load(vector_file)
    vectors = vectors.cpu().numpy().astype("float32")  # Convert to float32 (FAISS requires this)

    # Get the embedding dimension
    dim = vectors.shape[1]

    # Create FAISS index (L2 distance metric)
    index = faiss.IndexFlatL2(dim)

    # Add all vectors to the index
    index.add(vectors)
    print(f"Indexed {vectors.shape[0]} vectors with dimension {dim}")

    # Save the index to disk
    faiss.write_index(index, output_index)
    print(f"FAISS index saved to {output_index}")

    return index


In [19]:
if __name__ == "__main__":
    # Build the FAISS index from question embeddings
    index = build_faiss_index("../retriever/qa_tensors_trans.pt", "../retriever/qa_faiss_index_trans.index")

    # Test search: query the first vector and retrieve the top-5 nearest neighbors
    query_vector = index.reconstruct(0).reshape(1, -1)  
    distances, indices = index.search(query_vector, k=5)
    print("Top 5 results:", indices, distances)

Indexed 221 vectors with dimension 384
FAISS index saved to ../retriever/qa_faiss_index_trans.index
Top 5 results: [[ 0  6 10  1 11]] [[0.        6.026917  7.7131157 7.92223   9.683939 ]]
