In [1]:
from langchain_core.documents import Document

In [2]:
import pandas as pd

data_path = "../data/amazon.csv"
max_rows = 100
df = pd.read_csv(data_path).head(max_rows)

In [3]:
def _build_product_content(row: pd.Series) -> str:
    return f"""Product name is {row["product_name"]} and it's category is {row["category"]}. The actual price is ${row["actual_price"]} with a discount of {row["discount_percentage"]}%. It has a rating of {row["rating"]}/5.0 based on {row["rating_count"]} reviews. Following is the detailed description and customer reviews:
- About Product: {row["about_product"]}
- Customer Reviews: {row["review_content"]}
"""


In [4]:
documents = []
for index, row in df.iterrows():
    content = _build_product_content(row)

    metadata = {
        "product_id": row["product_id"],
        "product_name": row["product_name"],
        "category": row["category"],
        "price": row["actual_price"],
        "discount": row["discount_percentage"],
        "rating": row["rating"],
    }

    doc = Document(page_content=content, metadata=metadata)
    documents.append(doc)

print(len(documents))

100


In [5]:
documents[0]

Document(metadata={'product_id': 'B07JW9H4J1', 'product_name': 'Wayona Nylon Braided USB to Lightning Fast Charging and Data Sync Cable Compatible for iPhone 13, 12,11, X, 8, 7, 6, 5, iPad Air, Pro, Mini (3 FT Pack of 1, Grey)', 'category': 'Computers&Accessories|Accessories&Peripherals|Cables&Accessories|Cables|USBCables', 'price': '₹1,099', 'discount': '64%', 'rating': '4.2'}, page_content="Product name is Wayona Nylon Braided USB to Lightning Fast Charging and Data Sync Cable Compatible for iPhone 13, 12,11, X, 8, 7, 6, 5, iPad Air, Pro, Mini (3 FT Pack of 1, Grey) and it's category is Computers&Accessories|Accessories&Peripherals|Cables&Accessories|Cables|USBCables. The actual price is $₹1,099 with a discount of 64%%. It has a rating of 4.2/5.0 based on 24,269 reviews. Following is the detailed description and customer reviews:\n- About Product: High Compatibility : Compatible With iPhone 12, 11, X/XsMax/Xr ,iPhone 8/8 Plus,iPhone 7/7 Plus,iPhone 6s/6s Plus,iPhone 6/6 Plus,iPhone 5

In [6]:
import chromadb
from langchain_classic.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
client = chromadb.Client()
collection = client.create_collection(name="test_collection")

embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vector_store = Chroma.from_documents(documents, embedding_model)

In [12]:
import os
from langchain_groq import ChatGroq
from dotenv import load_dotenv

load_dotenv(override=True)

model = ChatGroq(
    api_key=os.getenv("GROQ_API_KEY"),
    model="llama-3.1-8b-instant"
)

In [13]:
from langchain_classic.chains import RetrievalQA

retriever = vector_store.as_retriever()

qa_chain = RetrievalQA.from_chain_type(
    llm=model,
    retriever=retriever,
    return_source_documents=True,
)

In [14]:
query = "Which product has the highest discount?"
response = qa_chain.invoke({"query": query})

print("Response:", response["result"])

Response: The product with the highest discount is Amazonbasics Micro Usb Fast Charging Cable For Android Smartphone,Personal Computer,Printer With Gold Plated Connectors (6 Feet, Black) with a discount of 50%.
