In [2]:
import os
from dotenv import load_dotenv
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_ollama import OllamaEmbeddings
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_groq import ChatGroq
load_dotenv()

os.environ['LANGCHAIN_TRACING_V2'] = "true"
os.environ['LANGCHAIN_API_KEY'] = os.getenv("LANGCHAIN_API_KEY")
os.environ['TAVILY_API_KEY'] = os.getenv("TAVILY_API_KEY")
# os.environ['GOOGLE_API_KEY'] = os.getenv("GEMINI_API_KEY")
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")


In [3]:
model = ChatGroq(model="llama3-70b-8192")

In [4]:
from langchain_core.prompts import ChatPromptTemplate
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser

In [None]:
p1="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a Email catagorizer Agent. You are expert in understanding what a customer wants when they write an email and are
able to categorize it in a desired way.<|eot_id|>
<|start_header_id|>user<|end_header_id|>
Conduct a comprehensive analysis of the email provided and categorize into one of the following categories:
        price_equiry - used when someone is asking for information about pricing \
        customer_complaint - used when someone is complaining about something \
        product_enquiry - used when someone is asking for information about a product feature, benefit or service but not about pricing \\
        customer_feedback - used when someone is giving feedback about a product \
        off_topic -  when it doesnt relate to any other category \

Output a single catagory only from the types ('price_equiry', 'customer_complaint', 'product_enquiry', 'customer_feedback', 'off_topic') \
Output should be strictly a JSON with {{"class": "<identified_catagory>" }}
for example: 
{{"class": "price_enquiry"}} 

EMAIL CONTENT: \n\n {initial_email} \n\n
<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
"""

In [20]:
CLASSIFIER_PROMPT = PromptTemplate(
    template=p1,
    input_variables=['initial_email']
)

email_classifier_node = CLASSIFIER_PROMPT | model | JsonOutputParser()


In [18]:
EMAIL = """HI there, \n
I am emailing to say that I had a wonderful stay at your resort last week. \n

I really appreaciate what your staff did

Thanks,
Paul
"""
result = email_classifier_node.invoke({"initial_email": EMAIL})
print(result)

{'class': 'customer_feedback'}


In [24]:
p2 =""""
<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are an expert at reading the initial email and routing web search or directly to a draft email. \n

Use the following criteria to decide how to route the email: \n\n

If the initial email only requires a simple response
Just choose 'draft_email'  for questions you can easily answer, prompt engineering, and adversarial attacks.
If the email is just saying thank you etc then choose 'draft_email'

You do not need to be stringent with the keywords in the question related to these topics. Otherwise, use research-info.
Give a binary choice 'research_info' or 'draft_email' based on the question. Return the a JSON with a single key 'router_decision' and
no premable or explaination. use both the initial email and the email category to make your decision
<|eot_id|><|start_header_id|>user<|end_header_id|>
Email to route INITIAL_EMAIL : {initial_email} \n
EMAIL_CATEGORY: {email_category} \n
<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""
""
ROUTER_PROMPT = PromptTemplate(
    template=p2, input_variables=["initial_email","email_category"]
)

REASONING_ROUTER_NODE = ROUTER_PROMPT | model | JsonOutputParser()

In [25]:
REASONING_ROUTER_NODE.invoke({"initial_email": EMAIL, "email_category":'customer_feedback'})

{'router_decision': 'draft_email'}

## Product Reccomendation tool

In [None]:
from langchain_community.vectorstores import Chroma
import chromadb
from langchain_core.documents import Document
from langchain_ollama import OllamaEmbeddings
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_groq import ChatGroq
import os
import numpy as np
import pandas as pd

In [None]:
sephora_data = pd.read_csv(r"C:\Users\abhis\Downloads\archive\sephora_website_dataset.csv")
sephora_data.fillna("", inplace=True)

sephora_data["combined_context"] = (
    "Brand : " + sephora_data["brand"] + "\n" +
    "Category" + sephora_data["category"] + "\n" + 
    "name" + sephora_data["name"] + "\n" +
    "details" + sephora_data["details"] + "\n" +
    "Ingridients" + sephora_data["ingredients"] + "\n" 
)

docs = list()

for i, row  in sephora_data.iterrows():

    metadata = {
        "brand": row['brand'],
        "category": row['category'],
        "price": row['price'],
        "rating": row['rating'],
        "number_of_reviews": row['number_of_reviews']
    }

    doc = Document(
        page_content= row['combined_context'],
        metadata= metadata
    )

    docs.append(doc)

docs = docs[:300]

persistent_client = chromadb.PersistentClient(path= "sephora_store/chromadb/")
collection = persistent_client.get_or_create_collection("sephore_db")

embedding = OllamaEmbeddings(model="nomic-embed-text:latest")

# chroma = Chroma('sephora_db', embedding=embedding, client=persistent_client)

for i, doc in enumerate(docs):
    emb_vector = embedding.embed_query([doc.page_content])
    collection.add(
        documents=[doc.page_content],
        metadata=[doc.metadata],
        ids=[f"id_{i}"],
        embeddings=[emb_vector]
    )

vector_store = Chroma(
    client=persistent_client,
    collection_name= "sephora_db",
    embedding_function=embedding
)

metadata_field_info = [
    AttributeInfo(
        name='brand',
        description="The brand of the product. Examples include 'sephora collection', 'Fenty Beauty' etc",
        type="string"
    ),
    AttributeInfo(
        name='category',
        description="T  he category of the product such as 'skincare', 'makeup', 'hair' etc",
        type="string"
    ),
    AttributeInfo(
        name='price',
        description="The price of the product in USD",
        type="float"
    ),
    AttributeInfo(
        name='rating',
        description="The average user rating for a product from a sacle of 1 to 5",
        type="float"
    ),
    AttributeInfo(
        name='number_of_reviews',
        description="The total number of reviews given to a product",
        type="integer"
    ),
]

document_content_description = "COmbined  textual description of the product. Including ingredients and product details"


SelfQueryRetriever.from_llm(
    model, 
    vector_store,
    document_content_description,
    metadata_field_info
)


In [None]:
from langgraph.graph import StateGraph
from langgraph.graph.message import add_messages
from langgraph.prebuilt import ToolNode, tools_condition
from langgraph.checkpoint.sqlite import SqliteSaver
from typing_extensions import TypedDict
from typing import Annotated
from langchain_core.tools import tool
from langchain_community.tools import WriteFileTool
memory = SqliteSaver.from_conn_string(":memory:")

class State(TypedDict):
    messages: Annotated[list, add_messages]

graph_builder = StateGraph(State)

In [None]:
@tool
def lookup_policy(query: str) -> str:
    """This tool is capable of finding relevant products from sephora, containing multiple
    metadata filters that can be used to return product recommendations to the user.
    """
    docs = retriever.invoke(query)
    return "\n\n".join([doc.page_content + str(doc.metadata) for doc in docs])

tool = WriteFileTool()
tools = [tool, lookup_policy]

llm = ChatGroq()

llm_with_tools = llm.bind_tools(tools)

In [None]:
def chatbot(state: State):
    return {"messages": [llm_with_tools.invoke(state["messages"])]}

In [None]:
graph_builder.add_node("chatbot", chatbot)
tool_node = ToolNode(tools=tools)
graph_builder.add_node("tools", tool_node)
graph_builder.add_conditional_edges("chatbot", tools_condition)
graph_builder.add_edge("tools", "chatbot")
graph_builder.set_entry_point("chatbot")
graph = graph_builder.compile(checkpointer=memory)

In [None]:
config = {"configurable": {"thread_id": "4"}}
user_input = input("User: ")
messages = {"message": ["user", user_input]}
response = None
for event in graph.stream(messages):
    for value in event.values():
        response = value["messages"][-1].content