In [3]:
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss
import pandas as pd

In [8]:
model = SentenceTransformer('all-MiniLM-L6-v2')

In [6]:
product_df = pd.read_csv('../data/Amazon-Products.csv')
product_name = product_df['name'].tolist()

In [None]:
count = product_df['name'].str.endswith('...', na=False).sum()
print(count)

237


In [15]:
print(len(product_df))

551585


In [9]:

batch_size = 1000
embedding_file = "../data/embeddings.npy"
num_products = len(product_name)
# Generate embeddings in batches and save them
all_embeddings = []
for i in range(0, num_products, batch_size):
    batch_texts = product_name[i:i+batch_size]
    batch_embeddings = model.encode(batch_texts).astype('float32')
    all_embeddings.append(batch_embeddings)

all_embeddings = np.vstack(all_embeddings)
np.save(embedding_file, all_embeddings)


In [10]:
all_embeddings = np.load(embedding_file)
dimension = all_embeddings.shape[1]

nlist = 100  
quantizer = faiss.IndexFlatL2(dimension) 
index = faiss.IndexIVFFlat(quantizer, dimension, nlist, faiss.METRIC_L2)

index.train(all_embeddings)

batch_size = 10000
for i in range(0, all_embeddings.shape[0], batch_size):
    batch_embeddings = all_embeddings[i:i+batch_size]
    index.add(batch_embeddings)
    
# Save the index to disk
faiss.write_index(index, "../data/large_index.ivf")

In [None]:
index = faiss.read_index("../data/large_index.ivf")

# Query example
query = "Apple iPhone 13 (256GB)"
query_embedding = model.encode([query]).astype('float32')

# Perform similarity search
k = 20  # Top 10 results
distances, indices = index.search(query_embedding, k)

# Display results
print("Query:", query)
for i in range(k):
    print(f"Result {i+1}:")
    print(f"  Text: {product_name[indices[0][i]]}")
    print(f"  Distance: {distances[0][i]}")

Query: Apple iPhone 13 (256GB)
Result 1:
  Text: Apple iPhone 13 (256GB) - Midnight
  Distance: 0.39686840772628784
Result 2:
  Text: Apple iPhone 13 (128GB) - Midnight
  Distance: 0.3995439112186432
Result 3:
  Text: Apple iPhone 13 (256GB) - Blue
  Distance: 0.5627175569534302
Result 4:
  Text: Apple iPhone 13 (128GB) - Blue
  Distance: 0.5786620378494263
Result 5:
  Text: Apple iPhone 13 (128GB) - Blue
  Distance: 0.5786620378494263
Result 6:
  Text: Apple iPhone 14 Plus (256 GB) - Blue
  Distance: 0.6229241490364075
Result 7:
  Text: Apple iPhone 14 Pro (256 GB) - Space Black
  Distance: 0.6652202606201172
Result 8:
  Text: Apple iPhone 14 Pro Max (128 GB) - Space Black
  Distance: 0.6759586334228516
Result 9:
  Text: Apple iPhone 12 (64GB) - Black
  Distance: 0.6826392412185669
Result 10:
  Text: Apple iPhone 14 Pro Max (256 GB) - Deep Purple
  Distance: 0.6864816546440125
Result 11:
  Text: Apple iPhone 12 (128GB) - Black
  Distance: 0.6985203623771667
Result 12:
  Text: Apple iP