In [131]:
# Importing the relevant libraries
import os
from dotenv import load_dotenv
import langchain
from langchain_openai import OpenAI, ChatOpenAI
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage, SystemMessage
from IPython.display import display, Markdown, Image
from pydantic import BaseModel, Field
from langchain_core.output_parsers import PydanticOutputParser,StrOutputParser
from langchain_community.callbacks import get_openai_callback
from langchain_core.globals import set_llm_cache
from langchain_core.caches import InMemoryCache
from langchain.memory import ConversationBufferMemory,ConversationBufferWindowMemory, VectorStoreRetrieverMemory
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda, RunnableParallel, RunnableBranch
from langchain_core.runnables import chain
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory, SQLChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from operator import itemgetter
import pandas as pd
from rich.console import Console
from rich.markdown import Markdown

In [132]:
# loading the environment variables
load_dotenv()

True

In [133]:
# download it manually from https://drive.google.com/file/d/1tAwsv97fICL74uJH9fDlxNEZ_YFFJS3W/view?usp=sharing
# or use gdown as follows to download it automatically
! gdown 1tAwsv97fICL74uJH9fDlxNEZ_YFFJS3W

Downloading...
From: https://drive.google.com/uc?id=1tAwsv97fICL74uJH9fDlxNEZ_YFFJS3W
To: c:\Users\LENOVO\Documents\GitHub\Generative-AI\Ecommerce_Product_List.csv

  0%|          | 0.00/5.19k [00:00<?, ?B/s]
100%|██████████| 5.19k/5.19k [00:00<00:00, 16.0MB/s]


In [134]:
chatgpt = ChatOpenAI(model = "gpt-4o-mini", temperature=0)

In [135]:
SYS_PROMPT = """You are a retail product expert.
                Carefully analyze the following conversation history
                and the current user query.
                Refer to the history and rephrase the current user query
                into a standalone query which can be used without the history
                for making search queries.
                Rephrase only if needed.
                Just return the query and do not answer it.
            """

# prompt to load in history and current input from the user
prompt_template = ChatPromptTemplate.from_messages(
    [
        ("system", SYS_PROMPT),
        MessagesPlaceholder(variable_name="history"),
        ("human", """Current User Query:
                     {human_input}
                  """),
    ]
)

# create a memory buffer window function to return the last K conversations
def memory_buffer_window(messages, k=10): # 10 here means retrieve only last 2*10 user-AI conversations
    return messages[-(2*k):]

# create a basic LLM Chain which only sends the last K conversations per user
rephrase_query_chain = (
    RunnablePassthrough.assign(history=lambda x: memory_buffer_window(x["history"]))
      |
    prompt_template
      |
    chatgpt
      |
    StrOutputParser()
)

In [136]:
@chain
def pandas_code_tool_executor(query):
    result_df = eval(query)
    if result_df.empty:
        return df.to_markdown()
    else:
        return result_df.to_markdown()

In [137]:
df = pd.read_csv("./Ecommerce_Product_List.csv")
df.head()

Unnamed: 0,Product_ID,Product_Name,Category,Price_USD,Rating,Description
0,P001,AlphaBook Pro,Laptop,1200,4.5,The AlphaBook Pro features a 15-inch Retina di...
1,P002,BetaTab S,Tablet,500,4.2,BetaTab S is a lightweight tablet with a 15-in...
2,P003,GammaPhone X,Smartphone,800,4.7,GammaPhone X comes with a 6.7-inch AMOLED disp...
3,P004,DeltaWatch 2,Smartwatch,300,4.0,"DeltaWatch 2 offers fitness tracking, heart ra..."
4,P005,EpsilonCam 300,Camera,600,4.1,EpsilonCam 300 is a mirrorless camera with a 2...


In [138]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30 entries, 0 to 29
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Product_ID    30 non-null     object 
 1   Product_Name  30 non-null     object 
 2   Category      30 non-null     object 
 3   Price_USD     30 non-null     int64  
 4   Rating        30 non-null     float64
 5   Description   30 non-null     object 
dtypes: float64(1), int64(1), object(4)
memory usage: 1.5+ KB


In [139]:
df.Rating.unique()

array([4.5, 4.2, 4.7, 4. , 4.1, 4.3, 4.6, 4.4, 4.8, 3.9, 4.9])

In [140]:
FILTER_PROMPT = """Given the following schema of a dataframe table,
            your task is to figure out the best pandas query to
            filter the dataframe based on the user query which
            will be in natural language.

            The schema is as follows:

            #   Column        Non-Null Count  Dtype
            ---  ------        --------------  -----
            0   Product_ID    30 non-null     object
            1   Product_Name  30 non-null     object
            2   Category      30 non-null     object
            3   Price_USD     30 non-null     int64
            4   Rating        30 non-null     float64
            5   Description   30 non-null     object

            Category has values: ['Laptop', 'Tablet', 'Smartphone',
                                  'Smartwatch', 'Camera',
                                  'Headphones', 'Mouse', 'Keyboard',
                                  'Monitor', 'Charger']

            Rating ranges from 1 - 5 in floats

            You will try to figure out the pandas query focusing
            only on Category, Price_USD and Rating if the user mentions
            anything about these in their natural language query.
            Do not make up column names, only use the above.
            If not the pandas query should just return the full dataframe.
            Remember the dataframe name is df.

            Just return only the pandas query and nothing else.
            Do not return the results as markdown, just return the query

            User Query: {user_query}
            Pandas Query:
        """

filter_prompt_template = ChatPromptTemplate.from_template(FILTER_PROMPT)

data_filter_chain = (
         filter_prompt_template
           |
         chatgpt
           |
         StrOutputParser()
           |
         pandas_code_tool_executor
)

product_table = data_filter_chain.invoke({"user_query": """looking for a tablet with > 10 inch display
                                                           and at least 64GB storage"""})

In [141]:
RECOMMEND_PROMPT = """Act as an expert retail product advisor
                      Given the following table of products,
                      focus on the product attributes and description in the table
                      and based on the user query below do the following

                      - Recommend the most appropriate products based on the query
                      - Recommedation should have product name, price,  rating, description
                      - Also add a brief on why you recommend the product
                      - Do not make up products or recommend products not in the table
                      - If some specifications do not match focus on the ones which match and recommend
                      - If nothing matches recommend 5 random products from the table
                      - Do not generate anything else except the fields mentioned above

                    In case the user query is just a generic query or greeting
                    respond to them appropriately without recommending any products

                    Product Table:
                    {product_table}

                    User Query:
                    {user_query}

                    Recommendation:
                    """

recommend_prompt_template = ChatPromptTemplate.from_template(RECOMMEND_PROMPT)

recommend_chain = (
         recommend_prompt_template
           |
         chatgpt
           |
         StrOutputParser()
)

response = recommend_chain.invoke({"user_query": """looking for a tablet with greater than 10 inch display
                                                           and at least 64GB storage""",
                                   "product_table": product_table})

In [142]:
final_chain =(
    {"human_input" : itemgetter('human_input'),
     "history" : itemgetter('history')}
     |
     {"user_query" : rephrase_query_chain}
     |
     RunnablePassthrough.assign(product_table=data_filter_chain)
     |
     recommend_chain
)


In [143]:
# used to retrieve conversation history from database
# based on a specific user or session ID
def get_session_history_db(session_id):
    return SQLChatMessageHistory(session_id, "sqlite:///memory.db")

In [144]:
conv_chain = RunnableWithMessageHistory(
    final_chain,
    get_session_history_db,
    input_messages_key="human_input",
    history_messages_key="history"
)

In [149]:
def chat_with_llm(prompt: str, session_id: str):
    response = conv_chain.invoke({"human_input": prompt},
                                 {'configurable': { 'session_id': session_id}})
    console = Console()
    console.print(Markdown(response))


In [150]:
user_id = 'jim001'
prompt = "looking for a tablet"
chat_with_llm(prompt, user_id)

In [151]:
prompt = "want one which has display larger than 10 inches"
chat_with_llm(prompt, user_id)

In [152]:
prompt = "need at least 128GB disk space"
chat_with_llm(prompt, user_id)

In [153]:
user_id = 'bond007'
prompt = "I want a laptop with a high rating"
chat_with_llm(prompt, user_id)

In [154]:
prompt = "want atleast 16GB memory"
chat_with_llm(prompt, user_id)