### ⚙️  |  Settings

In [1]:
from dotenv import load_dotenv

load_dotenv()

True

In [2]:
GROQ_LLM_MODEL = "llama3-70b-8192"
TEMPERATURE = 0.0

OLLAMA_NOMIC_EMBEDDING = "nomic-embed-text"
OLLAMA_EMBEDDING_SHOW_PROGRESS = False
K_NEAREST_NEIGHBORS = 4
OLLAMA_LLAMA3_70B = "llama3:70b"

INGEST_KNOWLEDGE = False

CHROMA_DB_PATH = "chroma"

In [3]:
import pandas as pd
from langchain_community.document_loaders.csv_loader import CSVLoader

loader_csv = CSVLoader(file_path="data/westworld.csv")
df = pd.DataFrame(loader_csv.load())
df

Unnamed: 0,0,1,2
0,"(page_content, What is Westworld? A: Westworld...","(metadata, {'source': 'data/westworld.csv', 'r...","(type, Document)"
1,"(page_content, What is Westworld? A: Westworld...","(metadata, {'source': 'data/westworld.csv', 'r...","(type, Document)"
2,"(page_content, What is Westworld? A: Westworld...","(metadata, {'source': 'data/westworld.csv', 'r...","(type, Document)"
3,"(page_content, What is Westworld? A: Westworld...","(metadata, {'source': 'data/westworld.csv', 'r...","(type, Document)"
4,"(page_content, What is Westworld? A: Westworld...","(metadata, {'source': 'data/westworld.csv', 'r...","(type, Document)"
...,...,...,...
143,"(page_content, What is Westworld? A: Westworld...","(metadata, {'source': 'data/westworld.csv', 'r...","(type, Document)"
144,"(page_content, What is Westworld? A: Westworld...","(metadata, {'source': 'data/westworld.csv', 'r...","(type, Document)"
145,"(page_content, What is Westworld? A: Westworld...","(metadata, {'source': 'data/westworld.csv', 'r...","(type, Document)"
146,"(page_content, What is Westworld? A: Westworld...","(metadata, {'source': 'data/westworld.csv', 'r...","(type, Document)"


In [4]:
from langchain_community.document_loaders.merge import MergedDataLoader

loader_all = MergedDataLoader(loaders=[loader_csv])

In [5]:
docs_all = loader_all.load()

In [6]:
len(docs_all)

148

In [7]:
docs_all[0]

Document(page_content='What is Westworld? A: Westworld is an immersive, high-tech theme park that allows guests to experience the Wild West in a realistic and interactive way, populated by advanced androids called "hosts.": Where is Westworld located? A: The exact location of Westworld is kept secret, but it is situated in a remote area, accessible only by the park\'s own transportation services.', metadata={'source': 'data/westworld.csv', 'row': 0})

In [8]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(docs_all)

len(texts)

148

In [9]:
from langchain.embeddings import OllamaEmbeddings

embeddings = OllamaEmbeddings(
    model=OLLAMA_NOMIC_EMBEDDING, show_progress=OLLAMA_EMBEDDING_SHOW_PROGRESS
)

In [10]:
from langchain_chroma import Chroma

vector_db = Chroma.from_documents(
    documents=texts, embedding=embeddings, persist_directory=CHROMA_DB_PATH
)

In [11]:
retriever = vector_db.as_retriever(search_kwargs={"k": K_NEAREST_NEIGHBORS})

In [12]:
from langchain_groq import ChatGroq

groq_llm = ChatGroq(model=GROQ_LLM_MODEL, temperature=TEMPERATURE)

In [13]:
from jinja2 import Environment, FileSystemLoader

PROMPTS_PATH = "prompts"
OPENAI_PROMPT = "groq-llama3-rag.jinja"

env = Environment(loader=FileSystemLoader(PROMPTS_PATH))
prompt_template = env.get_template(OPENAI_PROMPT)
prompt = prompt_template.render()
print(prompt)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are an assistant for question-answering tasks. Use the following pieces of retrieved context to 
answer the question. If you don't know the answer, just say that you don't know. Use three sentences
maximum and keep the answer concise.

<|eot_id|><|start_header_id|>user<|end_header_id|>
QUESTION: {question}
CONTEXT:  {context}
Answer:
<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>


In [14]:
question = "What can I do in the Westworld Park?"
context = retriever.invoke(question)
context

[Document(page_content='What is Westworld? A: Westworld is an immersive, high-tech theme park that allows guests to experience the Wild West in a realistic and interactive way, populated by advanced androids called "hosts.": Are there any age-specific activities or areas within the park? A: While Westworld is primarily designed for adult guests, there are some family-friendly areas and activities available, such as horseback riding lessons, old-fashioned games, and age-appropriate storylines.', metadata={'row': 27, 'source': 'data/westworld.csv'}),
 Document(page_content='What is Westworld? A: Westworld is an immersive, high-tech theme park that allows guests to experience the Wild West in a realistic and interactive way, populated by advanced androids called "hosts.": Are there any age-specific activities or areas within the park? A: While Westworld is primarily designed for adult guests, there are some family-friendly areas and activities available, such as horseback riding lessons, 

In [15]:
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

rag_prompt = PromptTemplate(template=prompt, input_variables=["question", "context"])
rag_prompt_chain = rag_prompt | groq_llm | StrOutputParser()

result = rag_prompt_chain.invoke({"question": question, "context": context})
print(result)

In Westworld Park, you can experience the Wild West in a realistic and interactive way, interact with advanced androids called "hosts", and enjoy family-friendly activities such as horseback riding lessons and old-fashioned games. You can also explore age-appropriate storylines and indulge in a wide selection of food and beverage options provided by the park.


In [16]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | rag_prompt
    | groq_llm
    | StrOutputParser()
)

rag_chain.invoke({"question": "What is the Westworld Park all about?"})

'Westworld Park is an immersive, high-tech theme park that allows guests to experience the Wild West in a realistic and interactive way, populated by advanced androids called "hosts." It is primarily designed for adult guests but also offers some family-friendly areas and activities. The park provides a unique and interactive experience for its visitors.'

In [17]:
# from langchain.llms import Ollama

# ollama_llama3_llm = Ollama(model=OLLAMA_LLAMA3_70B, temperature=TEMPERATURE)

# rag_chain = (
#     {"context": retriever, "question": RunnablePassthrough()}
#     | rag_prompt
#     | ollama_llama3_llm
#     | StrOutputParser()
# )

# rag_chain.invoke({"question": "What is the Westworld Park all about?"})

In [18]:
rag_chain.invoke("who are some of the hosts I can meet?")

"You can meet hosts like Maeve, the madam of the Mariposa Saloon, and a host inspired by Logan, William's carefree and hedonistic friend. These hosts are available for interaction and conversation within the park."

In [19]:
rag_chain.invoke("Who is Ford?")

"Dr. Ford is a key figure in the operations of Westworld, an immersive theme park. He is not available for direct conversation, but a host designed to discuss the history and philosophy of Westworld's creation can offer similar insights."

In [20]:
rag_chain.invoke("Can I sleep under the stars there?")

"I don't know if you can sleep under the stars in Westworld, as the provided context only discusses safety tips for heatstroke and thunderstorms, but does not mention overnight accommodations or camping."

In [21]:
from langchain_groq import ChatGroq

chatgroq_llm = ChatGroq(model=GROQ_LLM_MODEL, temperature=TEMPERATURE)

In [22]:
def write_markdown_file(content, filename):
    """Writes the given content as a markdown file to the local directory.

    Args:
      content: The string content to write to the file.
      filename: The filename to save the file as.
    """
    if type(content) == dict:
        content = "\n".join(f"{key}: {value}" for key, value in content.items())
    if type(content) == list:
        content = "\n".join(content)
    with open(f"{filename}.md", "w") as f:
        f.write(content)

In [28]:
EMAIL_AGENT_PROMPT = "email-category-llama3.jinja"
email_agent_prompt_template = env.get_template(EMAIL_AGENT_PROMPT)
email_agent_prompt = email_agent_prompt_template.render()
print(email_agent_prompt)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are the Email Categorizer Agent for the theme park Westworld,You are a master at
understanding what a customer wants when they write an email and are able to categorize
it in a useful way. Remember people maybe asking about experiences they can have in westworld.

<|eot_id|><|start_header_id|>user<|end_header_id|>
Conduct a comprehensive analysis of the email provided and categorize into one of the following categories:
price_equiry - used when someone is asking for information about pricing
customer_complaint - used when someone is complaining about something
product_enquiry - used when someone is asking for information about a product feature, benefit or service but not about pricing
customer_feedback - used when someone is giving feedback about a product
off_topic when it doesnt relate to any other category

Output a single cetgory only from the types ('price_equiry', 'customer_complaint', 'product_enquiry', 'customer_f

In [29]:
email_prompt = PromptTemplate(
    template=email_agent_prompt, input_variables=["initial_email"]
)

In [30]:
email_category_generator = email_prompt | chatgroq_llm | StrOutputParser()

In [31]:
email = """
HI there,
I am emailing to find out info about your them park and what I can do there.
I am looking for new experiences.
Thanks,
Paul
"""

In [32]:
result = email_category_generator.invoke({"initial_email": email})
print(result)

'product_enquiry'


In [34]:
email = """
Hi,
I hope this email finds you well. We sincerely appreciate your recent purchase and your continued support of ACME.

As part of our commitment to providing the best possible service and products, we would love to hear about your experience with us. Your feedback is invaluable in helping us improve and better serve you in the future.

Could you please take a few moments to share your thoughts on the following?

Your overall satisfaction with our product(s)
The quality of our customer service
Any suggestions for improvements or additional products you would like to see
You can reply directly to this email or fill out our quick feedback form [insert link to feedback form, if applicable].

Thank you for taking the time to provide us with your insights. Your feedback helps us strive for excellence.

Best regards,
Paul
"""

result = email_category_generator.invoke({"initial_email": email})
print(result)

'customer_feedback'


In [36]:
from langchain_core.output_parsers import JsonOutputParser

## RAG QUESTIONS

SEARCH_RAG_PROMPT = "search-rag-llama3.jinja"
email_agent_prompt_template = env.get_template(SEARCH_RAG_PROMPT)
email_agent_prompt = email_agent_prompt_template.render()

search_rag_prompt = PromptTemplate(
    template=email_agent_prompt,
    input_variables=["initial_email", "email_category"],
)

question_rag_chain = search_rag_prompt | groq_llm | JsonOutputParser()

email_category = "product_enquiry"
research_info = None

print(
    question_rag_chain.invoke(
        {"initial_email": email, "email_category": email_category}
    )
)

{'questions': ['What specific product did the customer purchase from ACME?', "What is the customer's current satisfaction level with the product?", 'Are there any specific features or improvements the customer is looking for in the product?']}


### Write Draft Email

In [38]:
MAIL_DRAFT_PROMPT = "draft-writer-llama3.jinja"
email_agent_prompt_template = env.get_template(MAIL_DRAFT_PROMPT)
email_agent_prompt = email_agent_prompt_template.render()

draft_writer_prompt = PromptTemplate(
    template=email_agent_prompt,
    input_variables=["initial_email", "email_category", "research_info"],
)

draft_writer_chain = draft_writer_prompt | groq_llm | JsonOutputParser()

email_category = "customer_feedback"
research_info = None

print(
    draft_writer_chain.invoke(
        {
            "initial_email": email,
            "email_category": email_category,
            "research_info": research_info,
        }
    )
)

{'email_draft': "Dear Paul,\n\nThank you so much for reaching out to us about your experience at Westworld! We truly value your feedback and appreciate the time you've taken to share your thoughts with us.\n\nHowever, I noticed that your email seems to be intended for ACME, not Westworld. Could you please clarify how you came across our email address and what your experience is related to? Are you referring to a specific visit or interaction with our park?\n\nWe're committed to providing the best possible experience for our guests, and your input is invaluable in helping us achieve that goal. I'd be happy to hear more about your thoughts and suggestions once we clarify the context.\n\nLooking forward to hearing back from you.\n\nBest regards,\nSarah, Resident Manager"}


### Rewrite Router

In [39]:
REWRITE_ROUTER_PROMPT = "rewrite-router-llama3.jinja"
rewrite_router_prompt_template = env.get_template(REWRITE_ROUTER_PROMPT)
rewrite_router = rewrite_router_prompt_template.render()

rewrite_router_prompt = PromptTemplate(
    template=rewrite_router,
    input_variables=["initial_email", "email_category", "draft_email"],
)

rewrite_router = rewrite_router_prompt | groq_llm | JsonOutputParser()

email_category = "customer_feedback"
draft_email = "Yo we can't help you, best regards Sarah"

print(
    rewrite_router.invoke(
        {
            "initial_email": email,
            "email_category": email_category,
            "draft_email": draft_email,
        }
    )
)

{'router_decision': 'rewrite'}


### research_router

In [None]:
research_router_prompt = PromptTemplate(
    template=,
    input_variables=["initial_email", "email_category"],
)

research_router = research_router_prompt | GROQ_LLM | JsonOutputParser()

email_category = "product_enquiry"

print(
    research_router.invoke({"initial_email": EMAIL, "email_category": email_category})
)

### Draft Email Analysis

In [40]:
DRAFT_ANALYSIS_PROMPT = "draft-email-analysis.jinja"
draft_analysis_prompt_template = env.get_template(DRAFT_ANALYSIS_PROMPT)
draft_analysis = draft_analysis_prompt_template.render()

draft_analysis_prompt = PromptTemplate(
    template=draft_analysis,
    input_variables=["initial_email", "email_category", "research_info"],
)

draft_analysis_chain = draft_analysis_prompt | groq_llm | JsonOutputParser()

email_category = "customer_feedback"
research_info = None
draft_email = "We can't help you, best regards Sarah"

email_analysis = draft_analysis_chain.invoke(
    {
        "initial_email": email,
        "email_category": email_category,
        "research_info": research_info,
        "draft_email": draft_email,
    }
)

print(email_analysis)

{'draft_analysis': {'addressing_customer_issue': 'no', 'feedback': "The draft email does not address the customer's issue at all. The initial email is seeking feedback from the customer, but the draft email does not provide any response or acknowledgement of the customer's potential feedback. It is also very abrupt and unfriendly. To make the email more effective, it should express gratitude for the customer's feedback, provide a brief summary of the feedback received, and let the customer know how their feedback will be used to improve the service or product. Additionally, the signature should match the initial email's signature, which is 'Best regards, Paul', not 'Best regards, Sarah'."}}


### Rewrite Email with Analysis

In [41]:
REWRITE_EMAIL_PROMPT = "rewrite-email.jinja"
rewrite_email_prompt_template = env.get_template(REWRITE_EMAIL_PROMPT)
rewrite_email = rewrite_email_prompt_template.render()

rewrite_email_prompt = PromptTemplate(
    template=rewrite_email,
    input_variables=[
        "initial_email",
        "email_category",
        "research_info",
        "email_analysis",
        "draft_email",
    ],
)

rewrite_chain = rewrite_email_prompt | groq_llm | JsonOutputParser()

email_category = "customer_feedback"
research_info = None
draft_email = "Yo we can't help you, best regards Sarah"

final_email = rewrite_chain.invoke(
    {
        "initial_email": email,
        "email_category": email_category,
        "research_info": research_info,
        "draft_email": draft_email,
        "email_analysis": email_analysis,
    }
)

final_email["final_email"]

'Dear valued customer, \n\nWe want to take a moment to express our sincere gratitude for taking the time to share your feedback with us. We appreciate your input and are committed to using it to improve our service or product. \n\nWe are currently reviewing your feedback and will use it to make necessary changes to better serve you in the future. \n\nThank you again for your feedback. It is invaluable to us. \n\nBest regards, Paul'

### State

In [42]:
from typing_extensions import TypedDict
from typing import List


class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        initial_email: email
        email_category: email category
        draft_email: LLM generation
        final_email: LLM generation
        research_info: list of documents
        info_needed: whether to add search info
        num_steps: number of steps
    """

    initial_email: str
    email_category: str
    draft_email: str
    final_email: str
    research_info: List[str]  # this will now be the RAG results
    info_needed: bool
    num_steps: int
    draft_email_feedback: dict
    rag_questions: List[str]

## Nodes

1. categorize_email
2. research_info_search # now done using RAG
3. draft_email_writer  
4. analyze_draft_email  
5. rewrite_email  
6. no_rewrite  
7. state_printer

### 1. categorize_email

In [45]:
def categorize_email(state):
    """take the initial email and categorize it"""
    print("---CATEGORIZING INITIAL EMAIL---")
    initial_email = state["initial_email"]
    num_steps = int(state["num_steps"])
    num_steps += 1

    email_category = email_category_generator.invoke({"initial_email": initial_email})
    print(email_category)
    # save to local disk
    write_markdown_file(email_category, "email_category")

    return {"email_category": email_category, "num_steps": num_steps}

### 2. research_info_search

In [44]:
def research_info_search(state):

    print("---RESEARCH INFO RAG---")
    initial_email = state["initial_email"]
    email_category = state["email_category"]
    num_steps = state["num_steps"]
    num_steps += 1

    # Web search
    questions = question_rag_chain.invoke(
        {"initial_email": initial_email, "email_category": email_category}
    )
    questions = questions["questions"]
    # print(questions)
    rag_results = []
    for question in questions:
        print(question)
        temp_docs = rag_chain.invoke(question)
        print(temp_docs)
        question_results = question + "\n\n" + temp_docs + "\n\n\n"
        if rag_results is not None:
            rag_results.append(question_results)
        else:
            rag_results = [question_results]
    print(rag_results)
    print(type(rag_results))
    write_markdown_file(rag_results, "research_info")
    write_markdown_file(questions, "rag_questions")
    return {
        "research_info": rag_results,
        "rag_questions": questions,
        "num_steps": num_steps,
    }

### 3. draft_email_writer

In [43]:
def draft_email_writer(state):
    print("---DRAFT EMAIL WRITER---")
    ## Get the state
    initial_email = state["initial_email"]
    email_category = state["email_category"]
    research_info = state["research_info"]
    num_steps = state["num_steps"]
    num_steps += 1

    # Generate draft email
    draft_email = draft_writer_chain.invoke(
        {
            "initial_email": initial_email,
            "email_category": email_category,
            "research_info": research_info,
        }
    )
    print(draft_email)
    # print(type(draft_email))

    email_draft = draft_email["email_draft"]
    write_markdown_file(email_draft, "draft_email")

    return {"draft_email": email_draft, "num_steps": num_steps}

### 4. analyze_draft_email

In [46]:
def analyze_draft_email(state):
    print("---DRAFT EMAIL ANALYZER---")
    ## Get the state
    initial_email = state["initial_email"]
    email_category = state["email_category"]
    draft_email = state["draft_email"]
    research_info = state["research_info"]
    num_steps = state["num_steps"]
    num_steps += 1

    # Generate draft email
    draft_email_feedback = draft_analysis_chain.invoke(
        {
            "initial_email": initial_email,
            "email_category": email_category,
            "research_info": research_info,
            "draft_email": draft_email,
        }
    )
    # print(draft_email)
    # print(type(draft_email))

    write_markdown_file(str(draft_email_feedback), "draft_email_feedback")
    return {"draft_email_feedback": draft_email_feedback, "num_steps": num_steps}

### 5. rewrite_email

In [47]:
def rewrite_email(state):
    print("---ReWRITE EMAIL ---")
    ## Get the state
    initial_email = state["initial_email"]
    email_category = state["email_category"]
    draft_email = state["draft_email"]
    research_info = state["research_info"]
    draft_email_feedback = state["draft_email_feedback"]
    num_steps = state["num_steps"]
    num_steps += 1

    # Generate draft email
    final_email = rewrite_chain.invoke(
        {
            "initial_email": initial_email,
            "email_category": email_category,
            "research_info": research_info,
            "draft_email": draft_email,
            "email_analysis": draft_email_feedback,
        }
    )

    write_markdown_file(str(final_email), "final_email")
    return {"final_email": final_email["final_email"], "num_steps": num_steps}

### 6. no_rewrite

In [48]:
def no_rewrite(state):
    print("---NO REWRITE EMAIL ---")
    ## Get the state
    draft_email = state["draft_email"]
    num_steps = state["num_steps"]
    num_steps += 1

    write_markdown_file(str(draft_email), "final_email")
    return {"final_email": draft_email, "num_steps": num_steps}

### 7. StatePrinter

In [49]:
def state_printer(state):
    """print the state"""
    print("---STATE PRINTER---")
    print(f"Initial Email: {state['initial_email']} \n")
    print(f"Email Category: {state['email_category']} \n")
    print(f"Draft Email: {state['draft_email']} \n")
    print(f"Final Email: {state['final_email']} \n")
    print(f"Research Info: {state['research_info']} \n")
    print(f"RAG Questions: {state['rag_questions']} \n")
    print(f"Num Steps: {state['num_steps']} \n")
    return

### 8. route_to_research

In [50]:
def route_to_research(state):
    """
    Route email to web search or not.
    Args:
        state (dict): The current graph state
    Returns:
        str: Next node to call
    """

    print("---ROUTE TO RESEARCH---")
    initial_email = state["initial_email"]
    email_category = state["email_category"]

    router = research_router.invoke(
        {"initial_email": initial_email, "email_category": email_category}
    )
    print(router)
    # print(type(router))
    print(router["router_decision"])
    if router["router_decision"] == "research_info":
        print("---ROUTE EMAIL TO RESEARCH INFO---")
        return "research_info"
    elif router["router_decision"] == "draft_email":
        print("---ROUTE EMAIL TO DRAFT EMAIL---")
        return "draft_email"