In [1]:
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Load the model
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

# Load product data (assuming you have a CSV with 'id', 'name', 'description')
df = pd.read_csv('sampled_data.csv')
# df = df[:50] # Limit to 50 products for demonstration purposes

# Combine relevant text fields for embedding
df['combined_text'] = df['asin'] + " : " + df['categoryName'] + " : " + df['title'] + " : Price is " + df['price'].to_string()

# Compute embeddings
embeddings = model.encode(df['combined_text'].tolist(), convert_to_numpy=True)

# Save the embeddings and IDs for retrieval
np.save("product_embeddings.npy", embeddings)
df.to_csv("product_data.csv", index=False)


In [4]:
# Create a FAISS index
index = faiss.IndexFlatL2(embeddings.shape[1])  # L2 distance for nearest neighbor search
index.add(embeddings)

# Save the FAISS index for later use
faiss.write_index(index, "product_index.faiss")


In [5]:
def search_products(query, top_k=5):
    # Load data and FAISS index
    df = pd.read_csv("product_data.csv")
    index = faiss.read_index("product_index.faiss")

    # Encode the user query
    query_embedding = model.encode([query], convert_to_numpy=True)

    # Search for the closest vectors
    distances, indices = index.search(query_embedding, top_k)

    # Retrieve matching product details
    results = df.iloc[indices[0]]
    return results[['asin', 'price', 'title', 'categoryName']]


In [15]:
# Example usage
query = "how many categories are there?"
print(search_products(query))

            asin  price                                              title  \
1574  B09PG5B8GX   9.99  Guichangkai 8 Pcs Realistic Cat Figurines, Rea...   
2810  B0CJ56FM5Q  13.69  Christmas Elf Kit, 12/24 Days Of Christmas 202...   
1735  B0CF9MSNPS   5.90  2PC Otter Keychains Cute Cartoon Animals Coupl...   
363   B0BJCZYP2J  28.27  Animal Cat and Mouse Micro Building Blocks Set...   
1615  B09N44F9FL  45.99  Plastic Stackable Storage Baskets Vegetable St...   

                        categoryName  
1574              Kids' Play Figures  
2810            Toy Advent Calendars  
1735         Luggage and travel gear  
363     Building & Construction Toys  
1615  Kitchen Storage & Organisation  
