In [1]:
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss

# Load your product dataset
product_data = pd.read_csv(r"C:\Users\shrut\Downloads\bigBasketProducts.csv")

# Extract relevant text columns
text_columns = ['product', 'category', 'sub_category', 'brand', 'description']

# Create a new column 'combined_text' with column names as prefixes
product_data['combined_text'] = product_data[text_columns].apply(lambda row: ' '.join([f"{col}:{value}" for col, value in row.items()]), axis=1)

# Assuming you have a unique identifier column
unique_ids = product_data['index']

# Load a pre-trained model (e.g., MiniLM for sentence embeddings)
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Create embeddings for your text data
embeddings = model.encode(product_data['combined_text'].tolist())

# Create an IndexFlatL2 index
index = faiss.IndexFlatL2(embeddings.shape[1])

# Add vectors to the index
index.add(embeddings)

# Save the index to a file
faiss.write_index(index, 'product_index.faiss')


In [2]:
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss

# Load your product dataset
product_data = pd.read_csv(r"C:\Users\shrut\Downloads\bigBasketProducts.csv")

# Load the index from a file
index = faiss.read_index("product_index.faiss")

# Load a pre-trained model (e.g., DistilBERT)
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')  # You can choose another model

# Define a custom question
custom_question = "Different types of oils?"

# Encode the custom question
question_embedding = model.encode([custom_question])

# Search for the nearest neighbors of the question vector
k = 5  # Number of neighbors to retrieve
_, neighbors = index.search(question_embedding, k)

# Retrieve the details of the nearest neighbor
neighbor_index = neighbors[0]
neighbor_details = product_data.loc[neighbor_index]

# Extract the relevant information for the answer
context = f"{neighbor_details['product']} {neighbor_details['category']} {neighbor_details['sub_category']} {neighbor_details['brand']} {neighbor_details['type']} {neighbor_details['description']}"

# Print the answer
print("Answer:", context)

Answer: 11196                            Kama Diffuser Oil
13682    Castor Carrier Oil - Pure Coldpressed Oil
22190                          Nakshatra Pooja Oil
16513                                    Pooja Oil
18930                    Almond Oil - Cold Pressed
Name: product, dtype: object 11196            Beauty & Hygiene
13682            Beauty & Hygiene
22190    Foodgrains, Oil & Masala
16513    Foodgrains, Oil & Masala
18930        Gourmet & World Food
Name: category, dtype: object 11196             Skin Care
13682             Skin Care
22190    Edible Oils & Ghee
16513    Edible Oils & Ghee
18930        Oils & Vinegar
Name: sub_category, dtype: object 11196                Omved
13682    Morpheme Remedies
22190             Mr. Gold
16513              Anandam
18930           Health 1st
Name: brand, dtype: object 11196              Aromatherapy
13682              Aromatherapy
22190         Other Edible Oils
16513         Other Edible Oils
18930    Flavoured & Other Oils
Name: type, 