In [None]:
# vector_similarity_search.ipynb
# Build vector database for similarity search using FAISS

# -- Imports --
import pandas as pd
import numpy as np
import faiss

In [None]:
# -- Load Processed Data --
data_path = "../data/processed_toronto_hpi.csv"
df = pd.read_csv(data_path)

# -- Feature Selection for Embeddings --
features = ['Year', 'Month', 'HPI']
embeddings = df[features].values.astype('float32')

# -- Initialize FAISS index --
dim = embeddings.shape[1]  # number of dimensions
index = faiss.IndexFlatL2(dim)

# -- Add embeddings to FAISS index --
index.add(embeddings)

print(f"✅ Added {index.ntotal} embeddings to FAISS index.")

# -- Example Query --
query_vector = np.array([[2024, 5, 350]]).astype('float32')  # Example query (year, month, price index)
distances, indices = index.search(query_vector, k=5)

print("\nTop 5 similar records:")
for idx, dist in zip(indices[0], distances[0]):
    print(f"Record: {df.iloc[idx].to_dict()}, Distance: {dist:.2f}")

# -- Save FAISS index --
faiss.write_index(index, "../model/vector_index.faiss")
print("✅ FAISS index saved to '../model/vector_index.faiss'")
