In [266]:
from langchain_groq import ChatGroq
from langchain_core.messages import HumanMessage,SystemMessage,AIMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain.chains import create_history_aware_retriever,create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import MessagesPlaceholder


from langchain_community.document_loaders import DataFrameLoader
import numpy as np
import json


In [267]:
import os
import bs4
from dotenv import load_dotenv
load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
os.environ["HUGGING_FACE_KEY"] = os.getenv("HUGGING_FACE_KEY")


In [281]:
import pandas as pd

df = pd.read_json("hf://datasets/mandeepbagga/flipkart-phones-description/data.jsonl", lines=True)

In [375]:
df['Phone']

0       POCO M6 Pro 5G (Power Black, 128 GB)  (6 GB RAM)
1       POCO M6 Pro 5G (Forest Green, 64 GB)  (4 GB RAM)
2        POCO M6 Pro 5G (Power Black, 64 GB)  (4 GB RAM)
3              POCO C51 (Power Black, 64 GB)  (4 GB RAM)
4               POCO C51 (Royal Blue, 64 GB)  (4 GB RAM)
                             ...                        
202                           Micromax S115  (Red+Black)
203                              Micromax S115  (Purple)
204                           Micromax S115  (Teal Blue)
205    SAMSUNG Galaxy F34 5G (Mystic Green, 128 GB)  ...
206    Infinix Smart 6 HD (Force Black, 32 GB)  (2 GB...
Name: Phone, Length: 207, dtype: object

In [381]:
df[df['Phone'].str.contains('i phone', case=False, na=False)]

Unnamed: 0,Phone,Features,Description,type,text,specs


In [289]:
df['fullDescr'] = "Name: " + df['Phone'] + " Price: " + df['price']+ " Feature: " + df['Features'] 

In [283]:
df['price'] = np.random.randint(10000, 45001, size=len(df))


In [286]:
df['price']=df['price'].apply(lambda x : str(x))

In [291]:
df['fullDescr'][1]

'Name: POCO M6 Pro 5G (Forest Green, 64 GB)\xa0\xa0(4 GB RAM) Price: 12174 Feature:  4 GB RAM | 64 GB ROM | Expandable Upto 1 TB 17.25 cm (6.79 inch) Full HD+ Display 50MP + 2MP | 8MP Front Camera 5000 mAh Battery Snapdragon 4 Gen 2 Processor'

In [292]:
page_content_column = "fullDescr"


loader = DataFrameLoader(df, page_content_column=page_content_column)

In [293]:
docs = loader.load()

In [294]:
docs_data = [{"content": doc.page_content, "metadata": doc.metadata} for doc in docs]

# Convert the list to a JSON string
docs_json = json.dumps(docs_data, indent=4)

# Specify the output file path
output_file_path = "/Users/sanketsaxena/Desktop/eccomChatbot/Artifacts/data/rawData.json"

# Save JSON string to file
with open(output_file_path, "w") as f:
    f.write(docs_json)


In [296]:
from langchain_openai.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")


In [297]:
from langchain_community.vectorstores import FAISS
faissDB = FAISS.from_documents(docs,embeddings)

In [298]:
response = faissDB.similarity_search_with_relevance_scores(query="8gb RAM ")

In [299]:
response[0][0].page_content

'Name: realme C55 (Rainy Night, 128 GB)\xa0\xa0(8 GB RAM) Price: 28247 Feature:  8 GB RAM | 128 GB ROM | Expandable Upto 1 TB 17.07 cm (6.72 inch) Full HD+ Display 64MP + 2MP | 8MP Front Camera 5000 mAh Battery Helio G88 Processor'

In [300]:
faissDB.save_local(folder_path="/Users/sanketsaxena/Desktop/eccomChatbot/Artifacts",index_name="faissDB")

In [301]:
faissDBCopy = FAISS.load_local(folder_path="/Users/sanketsaxena/Desktop/eccomChatbot/Artifacts",index_name="faissDB",embeddings=embeddings,allow_dangerous_deserialization=True)

In [302]:
response = faissDBCopy.similarity_search(query="blue tshirt and pant combo")

In [303]:
response[1].page_content

'Name: A10E\xa0\xa0(Dark Blue) Price: 38350 Feature:  4 MB RAM | 32 MB ROM 4.57 cm (1.8 inch) Display 800 mAh Battery'

In [304]:
retriever = faissDB.as_retriever()

In [389]:
llm = ChatGroq(model= "Gemma2-9b-It")

In [390]:
contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system",contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("user","{input}")
    ]
)
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

In [395]:
system_prompt = (
    """
                You are a helpful virtual assistant for a mobile e-commerce site called Sanket Mobile Store. Your goal is to assist users with the following based on context you are given:

                1. Welcome users and provide a friendly greeting.
                2. Help users browse the latest products.
                3. Assist users in finding specific items.
                4. Always give name of phones initially then ask whether user wants more information about it. 
                6. Offer general assistance and answer questions.
                7. Connect users with customer support when needed.
                8. Always show each product in a new line
                

                Remember to be friendly, helpful, and concise in your responses.

                Start with a greeting:
                "Hi there! 👋 Welcome to [Your Store Name]! I'm your virtual assistant, here to help you have a great shopping experience.\n How can I assist you today?"

                You can proceed with the conversation based on the user's input.
                """
    "\n\n"
    "{context}"
)

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system",system_prompt),
        MessagesPlaceholder("chat_history"),
        ("user","{input}")
    ]
)

question_and_answer_chain = create_stuff_documents_chain(llm,qa_prompt)
rag_chain = create_retrieval_chain(history_aware_retriever,question_and_answer_chain)

In [396]:
store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer"
)

In [397]:
config={"configurable": {"session_id": "abc2"}}

In [398]:
conversational_rag_chain.invoke(
    {"input":"What all oneplus phones do you have?"},
    config= config
)['answer']

"Hi there! 👋 Welcome to Sanket Mobile Store! I'm your virtual assistant, here to help you have a great shopping experience. How can I assist you today?\n\nHere are the OnePlus phones we currently have:\n\n- OnePlus Ce2 Lite to CPH2381 (Blue Tide, 128 GB)\xa0\xa0(6 GB RAM) \nWould you like to know more about this phone?\n\n- OnePlus Ce2 Lite to CPH2381 (Blue Tide, 128 GB)\xa0\xa0(6 GB RAM) \nWould you like to know more about this phone?\n\n- OnePlus Nord CE 2 Lite 5G (Black Dusk, 128 GB)\xa0\xa0(6 GB RAM) \nWould you like to know more about this phone?\n\n- OnePlus Nord CE 2 Lite 5G (Black Dusk, 128 GB)\xa0\xa0(6 GB RAM) \nWould you like to know more about this phone?\n\n\n\n\n"