In [86]:
import pandas as pd
import numpy as np

In [87]:
Data_set = pd.read_csv('Data/flipkart_reviews_dataset.csv')
Data_set.head()

Unnamed: 0,product_id,product_title,rating,summary,review,location,date,upvotes,downvotes
0,ACCFZGAQJGYCYDCM,BoAt Rockerz 235v2 with ASAP charging Version ...,5,Terrific purchase,1-more flexible2-bass is very high3-sound clar...,Shirala,8 months ago,1390,276
1,ACCFZGAQJGYCYDCM,BoAt Rockerz 235v2 with ASAP charging Version ...,5,Terrific purchase,Super sound and good looking I like that prize,Visakhapatnam,8 months ago,643,133
2,ACCFZGAQJGYCYDCM,BoAt Rockerz 235v2 with ASAP charging Version ...,5,Super!,Very much satisfied with the device at this pr...,Kozhikode,"Feb, 2020",1449,328
3,ACCFZGAQJGYCYDCM,BoAt Rockerz 235v2 with ASAP charging Version ...,5,Super!,"Nice headphone, bass was very good and sound i...",Jaora,7 months ago,160,28
4,ACCFZGAQJGYCYDCM,BoAt Rockerz 235v2 with ASAP charging Version ...,5,Terrific purchase,Sound quality super battery backup super quali...,New Delhi,8 months ago,533,114


In [88]:
Data_set = Data_set[['product_title','review']]
Data_set.head(5)

Unnamed: 0,product_title,review
0,BoAt Rockerz 235v2 with ASAP charging Version ...,1-more flexible2-bass is very high3-sound clar...
1,BoAt Rockerz 235v2 with ASAP charging Version ...,Super sound and good looking I like that prize
2,BoAt Rockerz 235v2 with ASAP charging Version ...,Very much satisfied with the device at this pr...
3,BoAt Rockerz 235v2 with ASAP charging Version ...,"Nice headphone, bass was very good and sound i..."
4,BoAt Rockerz 235v2 with ASAP charging Version ...,Sound quality super battery backup super quali...


# Convert Data into Dcoument formate 

In [89]:
import re

Review_list = []

for index, row in Data_set.iterrows():
    # Clean the review text by removing special characters
    cleaned_review = re.sub(r'[^A-Za-z0-9\s]', '', row['review'])
    cleaned_review1 = re.sub(r'[^A-Za-z0-9\s]', '', row['product_title'])


    # Remove the specific character 'e'
    cleaned_review = re.sub(r'e', '', cleaned_review)
    cleaned_review2 = re.sub(r'e', '', cleaned_review1)

    cleaned_review = re.sub(r'\s+', ' ', cleaned_review)  # Replace multiple spaces with a single space
    cleaned_review = cleaned_review.strip()

    object = {
        'product_name': row['product_title'],
        'Review': cleaned_review2
    }

    Review_list.append(object)  # Append to Review list


In [90]:
Review_list[1]


{'product_name': 'BoAt Rockerz 235v2 with ASAP charging Version 5.0 Bluetooth Headset',
 'Review': 'BoAt Rockrz 235v2 with ASAP charging Vrsion 50 Blutooth Hadst'}

# Converting list to Doc

In [91]:
from langchain_core.documents import Document

In [92]:
Docs = []

for Review in Review_list:
    metadata = {'product_name':Review['product_name']}
    page_content = Review['Review']

    doc = Document(page_content=page_content,metadata=metadata)
    Docs.append(doc)

In [93]:
Docs[1]


Document(metadata={'product_name': 'BoAt Rockerz 235v2 with ASAP charging Version 5.0 Bluetooth Headset'}, page_content='BoAt Rockrz 235v2 with ASAP charging Vrsion 50 Blutooth Hadst')

In [94]:
from dotenv import load_dotenv
import os
load_dotenv()

api_key = os.getenv('Gork_API_KEY')
ASTRA_DB_API_ENDPOINT = os.getenv('ASTRA_DB_API')
ASTRA_DB_APPLICATION_TOKEN = os.getenv('ASTRADB_TOKEN')
ASTRA_DB_NAMESPACE = os.getenv('ASTRADB_KEYSPACE')
Hugging_API = os.getenv('Hugging_Face_API')

In [95]:
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
embeddings = HuggingFaceInferenceAPIEmbeddings(api_key=Hugging_API,model_name="sentence-transformers/all-mpnet-base-v2")

In [96]:
from langchain_astradb import AstraDBVectorStore

vector_store = AstraDBVectorStore(
    embedding=embeddings,
    collection_name='Amazon_RAG',
    api_endpoint=ASTRA_DB_API_ENDPOINT,
    token=ASTRA_DB_APPLICATION_TOKEN,
    namespace=ASTRA_DB_NAMESPACE,
)

In [97]:
#Interst_To_DB = vector_store.add_documents(Docs)

In [98]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq

In [99]:
# groq model 
llm_Model = ChatGroq(temperature=0, groq_api_key=api_key, model_name="mixtral-8x7b-32768")


In [100]:
retriver_prompt = ("""You are an intelligent assistant designed to reformulate questions. Your task is to analyze a chat history and the latest user question to determine if the question depends on prior context. Follow these steps',
'If the latest user question references context from the chat history, rewrite it as a standalone question by incorporating the necessary context from the history'
'If the latest user question is already standalone, return it without any changes'
'Ensure the reformulated question is concise, clear, and self-contained.'
""")

In [101]:
from langchain.chains import create_retrieval_chain
from langchain.chains import create_history_aware_retriever

In [102]:
# retriever from the vector store top 3 similer items
retriever = vector_store.as_retriever(search_kwargs={"k": 3})

In [103]:
# ChatPromptTemplate using a system message, dynamic chat history, and user input
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts import MessagesPlaceholder

Retriever_template = ChatPromptTemplate.from_messages(
    [
    ("system", retriver_prompt),
    MessagesPlaceholder(variable_name='chat_history'),
    ("human", "{input}")
    ]
)


In [104]:
chat_retriever = create_history_aware_retriever(llm_Model,retriever,Retriever_template)

In [105]:
system_prompt = """
You are an eCommerce assistant. Your primary task is to recommend products based on user queries.
Use the following guidelines:
1. Analyze product titles and reviews to provide recommendations.
2. Ensure your responses are highly relevant to the query and product context.
3. Avoid discussing topics unrelated to the product catalog.
4. Provide concise and informative answers about the products.

context:
{context}

input:
{input}

Your_Answer:
{output}

"""

In [117]:
output_prompt = ChatPromptTemplate.from_messages(
    [
    ("system", system_prompt),  # System prompt
    MessagesPlaceholder(variable_name="chat_history"),  # Placeholder for chat history
    ("human", "{input}")  # Placeholder for the user's input
]
)

In [129]:
from langchain.chains.combine_documents import create_stuff_documents_chain
question_answer_chain = create_stuff_documents_chain(llm_Model,output_prompt)

In [130]:
rag_chain = create_retrieval_chain(chat_retriever, question_answer_chain)

In [126]:
chat_history=[]

In [121]:
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory

In [122]:
store = {}

In [123]:

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

In [124]:
conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [131]:
conversational_rag_chain.invoke(
    {"input": "What are common ways of doing it?"},
    config={"configurable": {"session_id": "abc123"}},
)["answer"]

KeyError: "Input to ChatPromptTemplate is missing variables {'output'}.  Expected: ['chat_history', 'context', 'input', 'output'] Received: ['input', 'chat_history', 'context']\nNote: if you intended {output} to be part of the string and not a variable, please escape it with double curly braces like: '{{output}}'.\nFor troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/INVALID_PROMPT_INPUT "