In [10]:
import pandas as pd
import os

df = pd.read_csv(os.path.join(os.getcwd(), "..", "data", "Amazon-Products-Cleaned.csv"))
df.head()

Unnamed: 0,name,main_category,sub_category,ratings,no_of_ratings,discount_price,actual_price
0,lloyd 1.5 ton 3 star inverter split ac 5 in 1 ...,appliances,air conditioners,4.2,2255,32999.0,58990.0
1,lg 1.5 ton 5 star ai dual inverter split ac co...,appliances,air conditioners,4.2,2948,46490.0,75990.0
2,lg 1 ton 4 star ai dual inverter split ac copp...,appliances,air conditioners,4.2,1206,34490.0,61990.0
3,lg 1.5 ton 3 star ai dual inverter split ac co...,appliances,air conditioners,4.0,69,37990.0,68990.0
4,carrier 1.5 ton 3 star inverter split ac coppe...,appliances,air conditioners,4.1,630,34490.0,67790.0


In [2]:
from sentence_transformers import SentenceTransformer

# Load the embedding model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm
2025-02-17 13:56:41.294866: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
import faiss

index = faiss.read_index(os.path.join(os.getcwd(), "..", "model", "faiss_index.index"))

In [4]:
import numpy as np
import re

# Function to retrieve products with filtering & ranking
def retrieve_products(query, budget=None, top_k=5):
    # Encode the query
    query_embedding = embedding_model.encode([query])
    query_embedding = np.array(query_embedding).astype("float32")

    # Search the FAISS index
    distances, indices = index.search(query_embedding, top_k * 2)  # Retrieve more for filtering
    retrieved_df = df.iloc[indices[0]]

    # Apply budget filtering if specified
    if budget:
        retrieved_df = retrieved_df[retrieved_df["discount_price"] <= budget]

    # Detect if the user wants the "cheapest" product
    if re.search(r'\b(cheapest|lowest price|budget-friendly|affordable)\b', query, re.IGNORECASE):
        # Sort by price first, then ratings
        retrieved_df = retrieved_df.sort_values(by=["discount_price", "ratings", "no_of_ratings"], ascending=[True, False, False])
    else:
        # Default sorting: prioritize ratings
        retrieved_df = retrieved_df.sort_values(by=["ratings", "no_of_ratings"], ascending=[False, False])

    return retrieved_df.head(top_k)

In [8]:
import openai

# Set your OpenAI API key
client = openai.OpenAI(api_key="",
                       base_url="https://openrouter.ai/api/v1")

# Function to generate a response using GPT
def generate_response(query, retrieved_products):
    # Format the retrieved products into a prompt
    product_details = "\n".join([
        f"{row['name']} ({row['main_category']} - {row['sub_category']}): "
        f"Ratings: {row['ratings']} ({row['no_of_ratings']} ratings), "
        f"Discount Price: {row['discount_price']}, Actual Price: {row['actual_price']}"
        for _, row in retrieved_products.iterrows()
    ])

    prompt = f"""You are a shopping assistant. Recommend products based on the user's query.
Query: {query}
Products:
{product_details}
Response:"""

    # Call the OpenAI API
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",  # Use "gpt-3.5-turbo" for GPT-3.5
        messages=[
            {"role": "system", "content": "You are a helpful shopping assistant."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=500,  # Limit the response length
        temperature=0.7  # Control creativity (0 = deterministic, 1 = creative)
    )

    # Extract the generated response
    return response.choices[0].message.content

In [6]:
def extract_budget(query):
    match = re.search(r'\$?(\d{1,6})', query)
    return int(match.group(1)) if match else None

def shopping_assistant(query):
    # Step 1: Retrieve relevant products
    budget = extract_budget(query)
    retrieved_products = retrieve_products(query, budget)

    # Step 2: Generate a response
    response = generate_response(query, retrieved_products)
    return response

In [11]:
query = "Find a good laptop with high ratings"
response = shopping_assistant(query)
print(response)

Based on your query for a good laptop with high ratings, here are some options for you to consider:

1. Lenovo ThinkBook 15 G3 Ryzen 3 15.6 FHD Thin and Light Laptop
   - Ratings: 4.4 (5 ratings)
   - Discount Price: 35,990.0
   - Actual Price: 58,500.0

2. Acer Nitro 5 Gaming Laptop Intel Core i5-11400H 11th Gen Processor
   - Ratings: 4.2 (492 ratings)
   - Discount Price: 62,990.0
   - Actual Price: 88,999.0

3. Lenovo IdeaPad Gaming 3 Intel Core i5 11th Gen 15.6 FHD IPS Gaming Laptop
   - Ratings: 4.1 (492 ratings)
   - Discount Price: 54,990.0
   - Actual Price: 82,490.0

4. Acer Aspire 5 Gaming Laptop Intel Core i5 12th Gen 12-Cores Processor
   - Ratings: 4.1 (171 ratings)
   - Discount Price: 61,990.0
   - Actual Price: 84,999.0

5. Acer Extensa 15 Lightweight Laptop Intel Core i3 11th Gen Processor
   - Ratings: 4.0 (573 ratings)
   - Discount Price: 34,490.0
   - Actual Price: 44,999.0

These laptops have high ratings and offer a range of features suitable for various needs.
