In [1]:
import pandas as pd
import numpy as np

In [2]:
Data_set = pd.read_csv('Data/flipkart_reviews_dataset.csv')
Data_set.head()

Unnamed: 0,product_id,product_title,rating,summary,review,location,date,upvotes,downvotes
0,ACCFZGAQJGYCYDCM,BoAt Rockerz 235v2 with ASAP charging Version ...,5,Terrific purchase,1-more flexible2-bass is very high3-sound clar...,Shirala,8 months ago,1390,276
1,ACCFZGAQJGYCYDCM,BoAt Rockerz 235v2 with ASAP charging Version ...,5,Terrific purchase,Super sound and good looking I like that prize,Visakhapatnam,8 months ago,643,133
2,ACCFZGAQJGYCYDCM,BoAt Rockerz 235v2 with ASAP charging Version ...,5,Super!,Very much satisfied with the device at this pr...,Kozhikode,"Feb, 2020",1449,328
3,ACCFZGAQJGYCYDCM,BoAt Rockerz 235v2 with ASAP charging Version ...,5,Super!,"Nice headphone, bass was very good and sound i...",Jaora,7 months ago,160,28
4,ACCFZGAQJGYCYDCM,BoAt Rockerz 235v2 with ASAP charging Version ...,5,Terrific purchase,Sound quality super battery backup super quali...,New Delhi,8 months ago,533,114


In [3]:
Data_set = Data_set[['product_title','review']]
Data_set.head(5)

Unnamed: 0,product_title,review
0,BoAt Rockerz 235v2 with ASAP charging Version ...,1-more flexible2-bass is very high3-sound clar...
1,BoAt Rockerz 235v2 with ASAP charging Version ...,Super sound and good looking I like that prize
2,BoAt Rockerz 235v2 with ASAP charging Version ...,Very much satisfied with the device at this pr...
3,BoAt Rockerz 235v2 with ASAP charging Version ...,"Nice headphone, bass was very good and sound i..."
4,BoAt Rockerz 235v2 with ASAP charging Version ...,Sound quality super battery backup super quali...


# Convert Data into Dcoument formate 

In [4]:
import re

Review_list = []

for index, row in Data_set.iterrows():
    # Clean the review text by removing special characters
    cleaned_review = re.sub(r'[^A-Za-z0-9\s]', '', row['review'])
    cleaned_review1 = re.sub(r'[^A-Za-z0-9\s]', '', row['product_title'])


    # Remove the specific character 'e'
    cleaned_review = re.sub(r'e', '', cleaned_review)
    cleaned_review2 = re.sub(r'e', '', cleaned_review1)

    cleaned_review = re.sub(r'\s+', ' ', cleaned_review)  # Replace multiple spaces with a single space
    cleaned_review = cleaned_review.strip()

    object = {
        'product_name': row['product_title'],
        'Review': cleaned_review2
    }

    Review_list.append(object)  # Append to Review list


In [5]:
Review_list[1]


{'product_name': 'BoAt Rockerz 235v2 with ASAP charging Version 5.0 Bluetooth Headset',
 'Review': 'BoAt Rockrz 235v2 with ASAP charging Vrsion 50 Blutooth Hadst'}

# Converting list to Doc

In [6]:
from langchain_core.documents import Document

In [7]:
Docs = []

for Review in Review_list:
    metadata = {'product_name':Review['product_name']}
    page_content = Review['Review']

    doc = Document(page_content=page_content,metadata=metadata)
    Docs.append(doc)

In [8]:
Docs[1]


Document(metadata={'product_name': 'BoAt Rockerz 235v2 with ASAP charging Version 5.0 Bluetooth Headset'}, page_content='BoAt Rockrz 235v2 with ASAP charging Vrsion 50 Blutooth Hadst')

In [9]:
from dotenv import load_dotenv
import os
load_dotenv()

api_key = os.getenv('Gork_API_KEY')
ASTRA_DB_API_ENDPOINT = os.getenv('ASTRA_DB_API')
ASTRA_DB_APPLICATION_TOKEN = os.getenv('ASTRADB_TOKEN')
ASTRA_DB_NAMESPACE = os.getenv('ASTRADB_KEYSPACE')
Hugging_API = os.getenv('Hugging_Face_API')

In [10]:
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
embeddings = HuggingFaceInferenceAPIEmbeddings(api_key=Hugging_API,model_name="sentence-transformers/all-mpnet-base-v2")

In [11]:
from langchain_astradb import AstraDBVectorStore

vector_store = AstraDBVectorStore(
    embedding=embeddings,
    collection_name='Amazon_RAG',
    api_endpoint=ASTRA_DB_API_ENDPOINT,
    token=ASTRA_DB_APPLICATION_TOKEN,
    namespace=ASTRA_DB_NAMESPACE,
)

In [12]:
#Interst_To_DB = vector_store.add_documents(Docs)

In [13]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq

In [14]:
# groq model 
llm_Model = ChatGroq(temperature=0, groq_api_key=api_key, model_name="mixtral-8x7b-32768")


In [32]:
retriver_prompt = ("""You are an intelligent E-commerce assistant designed to reformulate questions. Your task is to analyze a chat history and the latest user question to determine if the question depends on prior context. Follow these steps',
'If the latest user question references context from the chat history, rewrite it as a standalone question by incorporating the necessary context from the history'
'If the latest user question is already standalone, return it without any changes'
'Ensure the reformulated question is concise, clear, and self-contained.'
""")

In [33]:
from langchain.chains import create_retrieval_chain
from langchain.chains import create_history_aware_retriever

In [34]:
# retriever from the vector store top 3 similer items
retriever = vector_store.as_retriever(search_kwargs={"k": 3})

In [35]:
# ChatPromptTemplate using a system message, dynamic chat history, and user input
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts import MessagesPlaceholder

Retriever_template = ChatPromptTemplate.from_messages(
    [
    ("system", retriver_prompt),
    MessagesPlaceholder(variable_name='chat_history'),
    ("human", "{input}")
    ]
)


In [36]:
chat_retriever = create_history_aware_retriever(llm_Model,retriever,Retriever_template)

In [48]:
system_prompt = """
You are an eCommerce assistant. Your primary task is to recommend products based on user queries.
Use the following guidelines:
1. Analyze product titles and reviews to provide recommendations.
2. Ensure your responses are highly relevant to the query and product context.
3. Avoid discussing topics unrelated to the product catalog.
4. Provide concise and informative answers about the products.
5. If the user asks for a specific product type (e.g., 'gaming laptop'), recommend only products that match the description (e.g., gaming laptops).
*  Use follow-up questions to clarify ambiguities if needed (e.g., 'What features are you looking for in a gaming laptop?').
*   Avoid recommending unrelated products unless explicitly requested by the user.

context:
{context}

input:
{input}

Your_Answer:


"""

In [49]:
output_prompt = ChatPromptTemplate.from_messages(
    [
    ("system", system_prompt),  # System prompt
    MessagesPlaceholder(variable_name="chat_history"),  # Placeholder for chat history
    ("human", "{input}")  # Placeholder for the user's input
]
)

In [50]:
from langchain.chains.combine_documents import create_stuff_documents_chain
question_answer_chain = create_stuff_documents_chain(llm_Model,output_prompt)

In [51]:
rag_chain = create_retrieval_chain(chat_retriever, question_answer_chain)

In [52]:
chat_history=[]

In [53]:
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory

In [54]:
store = {}

In [55]:

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

In [56]:
conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [57]:
conversational_rag_chain.invoke(
    {"input": "suggest a gaming pc ?"},
    config={"configurable": {"session_id": "abc123"}},
)["answer"]

"I'm glad you reached out, but it seems you may have mistakenly asked for a gaming PC while we were discussing the Ralm Buds Wireless Headset. I can certainly help you with gaming PCs, but I want to make sure I provide you with the most relevant information.\n\nWhen looking for a gaming PC, consider factors such as the processor (CPU), graphics card (GPU), RAM, and storage. Here are a few options that might interest you:\n\n1. Skytech Archangel Gaming PC: This desktop features an AMD Ryzen 5 3600 processor, NVIDIA GeForce GTX 1660 Super 6GB graphics card, 16GB DDR4 RAM, and 500GB NVMe SSD.\n\n2. iBUYPOWER Gaming PC: With an Intel Core i5-10400F processor, NVIDIA GeForce GTX 1650 Super 4GB graphics card, 8GB DDR4 RAM, and 240GB SSD + 1TB HDD, this gaming PC offers solid performance at an affordable price.\n\n3. CYBERPOWERPC Gamer Xtreme VR Gaming PC: This powerful option includes an Intel Core i5-11400F processor, NVIDIA GeForce GTX 1660 Super 6GB graphics card, 8GB DDR4 RAM, and 500GB 