# **Model Validation PlayGround**

In [23]:
# %pip install openai
# # %pip install pandarallel
# %pip install langchain
# %pip install chromadb
# %pip install tiktoken

Collecting tiktoken
  Obtaining dependency information for tiktoken from https://files.pythonhosted.org/packages/f4/2e/0adf6e264b996e263b1c57cad6560ffd5492a69beb9fd779ed0463d486bc/tiktoken-0.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata
  Downloading tiktoken-0.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Downloading tiktoken-0.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[?25hInstalling collected packages: tiktoken
Successfully installed tiktoken-0.5.1
Note: you may need to restart the kernel to use updated packages.


In [1]:
import openai
import os
import getpass
import asyncio

In [2]:
import pandas as pd
import numpy as np
import ast  # Import the ast module for literal evaluation
import seaborn as sns
import matplotlib.pyplot as plt

pd.set_option('display.max_colwidth', 100)
# %pip install pandarallel
import multiprocessing
num_processors = multiprocessing.cpu_count()
print(f'Available CPUs: {num_processors}')

import pandarallel
from pandarallel import pandarallel
pandarallel.initialize(nb_workers=num_processors-1, use_memory_fs=False)

Available CPUs: 8
INFO: Pandarallel will run on 7 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.


In [69]:
#LANGCHAIN
import langchain
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.chains import LLMChain

#CHROMA
import chromadb
from chromadb.utils import embedding_functions
from langchain.vectorstores import Chroma

# Setting up the chroma client
chroma_client = chromadb.PersistentClient(path="vectorstores")
chroma_client.heartbeat()

# MODEL ASSESSMENT
# %pip install rouge-score
# from rouge_score import rouge_scorer
# from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

1699229161840445831

-----
## Entire Dataframe

In [58]:
df_messages=pd.read_csv('human_validation_with_relevent_date.csv', parse_dates=['sender_date','replier_date'])
df_messages.dropna(subset=['sender'], axis=0, inplace=True)
df_messages.rename(columns={'Sender_Receiver_Emails':'Replier_Emails_Sender', 'Sender_Emails_All':'Replier_Emails_All'}, inplace=True)
df_messages.head()

Unnamed: 0.1,Unnamed: 0,subject,sender,sender_date,message,replier,replier_date,reply_message,Email_ID,Replier_Emails_Sender,Replier_Emails_All
0,0,Basketball Game,Scott,2023-01-01,"Yoo do you wanna go watch the bball game tomorrow? I snagged some free tickets, its gonna be sick.",Radhika,2023-01-02,"Hey Scott,\n\nI appreciate the invite a lot. But I don't enjoy bball. Can we go watch orchestra/...",1,"['Hey Scott,\n\nAs you already know, I am a vegetarian and would prefer having a sandwich/ salad...","['Hey Scott,\n\nAs you already know, I am a vegetarian and would prefer having a sandwich/ salad..."
1,1,Basketball Game,Scott,2023-01-03,Bro itll be fun trust me. I wanna watch sports. Broadway is super boring. Ill buy u a hot dog if...,Radhika,2023-01-04,"Hey Scott,\n\nAs you already know, I am a vegetarian and would prefer having a sandwich/ salad f...",2,"[""Hey Scott,\n\nI appreciate the invite a lot. But I don't enjoy bball. Can we go watch orchestr...","[""Hey Scott,\n\nI appreciate the invite a lot. But I don't enjoy bball. Can we go watch orchestr..."
2,2,Basketball Game,Scott,2023-01-05,ah but meat is so delish. no worries though. The game starts at 9 pm. U got other plans for that...,Radhika,2023-01-06,"Hello Scott,\n\nI should be free to go at 9pm that night. I do have a meeting at 7am the next mo...",3,"[""Hey Scott,\n\nI appreciate the invite a lot. But I don't enjoy bball. Can we go watch orchestr...","[""Hey Scott,\n\nI appreciate the invite a lot. But I don't enjoy bball. Can we go watch orchestr..."
3,3,Basketball Game,Scott,2023-01-07,"Ill teach u about basketball dont worry, ill get u a beer if you dont want a hot dog. Do you lik...",Radhika,2023-01-08,"Scott,\n\nThat sounds great! You do owe me a drink for all the nice things I do for you as your ...",4,"[""Hey Scott,\n\nI appreciate the invite a lot. But I don't enjoy bball. Can we go watch orchestr...","[""Hey Scott,\n\nI appreciate the invite a lot. But I don't enjoy bball. Can we go watch orchestr..."
4,4,Basketball Game,Scott,2023-01-09,I like baseball too. Can you do sports betting on badminton?,Radhika,2023-01-10,"Hey Scott,\n\nI don't think so - badminton is a far dignifies sport for sports betting. But I ca...",5,"[""Hey Scott,\n\nI appreciate the invite a lot. But I don't enjoy bball. Can we go watch orchestr...","[""Hey Scott,\n\nI appreciate the invite a lot. But I don't enjoy bball. Can we go watch orchestr..."


In [59]:
# df_messages=pd.read_csv('Hedwig/07_HumanValidation/20231104_human_validation_dataset.csv')
df_messages['Replier_Emails_Sender'] = df_messages['Replier_Emails_Sender'].apply(lambda x: ast.literal_eval(x) if pd.notna(x) else [])
df_messages['num_emails_toSender'] = df_messages['Replier_Emails_Sender'].apply(lambda x: len(x) if isinstance(x, list) else np.nan) + 1

df_messages['Replier_Emails_All'] = df_messages['Replier_Emails_All'].apply(lambda x: ast.literal_eval(x) if pd.notna(x) else [])
df_messages['num_emails_all'] = df_messages['Replier_Emails_All'].apply(lambda x: len(x) if isinstance(x, list) else np.nan) + 1

# df_messages['sender']=df_messages['sender'].astype('str')
# df_messages['reply_sender']=df_messages['reply_sender'].astype('str')
df_messages['sender_replier_thread'] = df_messages['sender'].str.cat(df_messages['replier'], sep='-')
df_messages

Unnamed: 0.1,Unnamed: 0,subject,sender,sender_date,message,replier,replier_date,reply_message,Email_ID,Replier_Emails_Sender,Replier_Emails_All,num_emails_toSender,num_emails_all,sender_replier_thread
0,0,Basketball Game,Scott,2023-01-01,"Yoo do you wanna go watch the bball game tomorrow? I snagged some free tickets, its gonna be sick.",Radhika,2023-01-02,"Hey Scott,\n\nI appreciate the invite a lot. But I don't enjoy bball. Can we go watch orchestra/...",1,"[Hey Scott,\n\nAs you already know, I am a vegetarian and would prefer having a sandwich/ salad ...","[Hey Scott,\n\nAs you already know, I am a vegetarian and would prefer having a sandwich/ salad ...",6,13,Scott-Radhika
1,1,Basketball Game,Scott,2023-01-03,Bro itll be fun trust me. I wanna watch sports. Broadway is super boring. Ill buy u a hot dog if...,Radhika,2023-01-04,"Hey Scott,\n\nAs you already know, I am a vegetarian and would prefer having a sandwich/ salad f...",2,"[Hey Scott,\n\nI appreciate the invite a lot. But I don't enjoy bball. Can we go watch orchestra...","[Hey Scott,\n\nI appreciate the invite a lot. But I don't enjoy bball. Can we go watch orchestra...",6,13,Scott-Radhika
2,2,Basketball Game,Scott,2023-01-05,ah but meat is so delish. no worries though. The game starts at 9 pm. U got other plans for that...,Radhika,2023-01-06,"Hello Scott,\n\nI should be free to go at 9pm that night. I do have a meeting at 7am the next mo...",3,"[Hey Scott,\n\nI appreciate the invite a lot. But I don't enjoy bball. Can we go watch orchestra...","[Hey Scott,\n\nI appreciate the invite a lot. But I don't enjoy bball. Can we go watch orchestra...",6,13,Scott-Radhika
3,3,Basketball Game,Scott,2023-01-07,"Ill teach u about basketball dont worry, ill get u a beer if you dont want a hot dog. Do you lik...",Radhika,2023-01-08,"Scott,\n\nThat sounds great! You do owe me a drink for all the nice things I do for you as your ...",4,"[Hey Scott,\n\nI appreciate the invite a lot. But I don't enjoy bball. Can we go watch orchestra...","[Hey Scott,\n\nI appreciate the invite a lot. But I don't enjoy bball. Can we go watch orchestra...",6,13,Scott-Radhika
4,4,Basketball Game,Scott,2023-01-09,I like baseball too. Can you do sports betting on badminton?,Radhika,2023-01-10,"Hey Scott,\n\nI don't think so - badminton is a far dignifies sport for sports betting. But I ca...",5,"[Hey Scott,\n\nI appreciate the invite a lot. But I don't enjoy bball. Can we go watch orchestra...","[Hey Scott,\n\nI appreciate the invite a lot. But I don't enjoy bball. Can we go watch orchestra...",6,13,Scott-Radhika
5,5,Basketball Game,Scott,2023-01-11,"Bro, badminton is boring, what are you talking about. Anyways, Im really tired. I been writing a...",Radhika,2023-01-12,"Hey Scott,\n\nI had a very relaxed day after a long night. I am currently working on school work...",6,"[Hey Scott,\n\nI appreciate the invite a lot. But I don't enjoy bball. Can we go watch orchestra...","[Hey Scott,\n\nI appreciate the invite a lot. But I don't enjoy bball. Can we go watch orchestra...",6,13,Scott-Radhika
6,6,New Member Onboarding,Scott,2023-01-13,"Yo bro, do you think aarushi could help on our project. She's my homie and I think we could use ...",Kshitij,2023-01-14,"Hey Scott, I think thats a great idea. We are looking for more participants in the project. What...",7,"[Hey Scott, Applied Mathematics is a great skillset! We can definitely benefit from her in case ...","[Hey Scott, Applied Mathematics is a great skillset! We can definitely benefit from her in case ...",18,25,Scott-Kshitij
7,7,New Member Onboarding,Scott,2023-01-15,She does Applied Math. Dunno what she does but im just tryna to get to know her better tbh. U wa...,Kshitij,2023-01-16,"Hey Scott, Applied Mathematics is a great skillset! We can definitely benefit from her in case s...",8,"[Hey Scott, I think thats a great idea. We are looking for more participants in the project. Wha...","[Hey Scott, I think thats a great idea. We are looking for more participants in the project. Wha...",18,25,Scott-Kshitij
8,8,New Member Onboarding,Scott,2023-01-17,Let me hit her up and ask her. Can I be the one that does the study with her. Shes cool and i wa...,Kshitij,2023-01-18,"I know you two are fairly close Scott. I met her and her boyfriend last week, and both of them a...",9,"[Hey Scott, I think thats a great idea. We are looking for more participants in the project. Wha...","[Hey Scott, I think thats a great idea. We are looking for more participants in the project. Wha...",18,25,Scott-Kshitij
9,9,New Member Onboarding,Scott,2023-01-19,":( Oh, didnt know she had a boyfriend. All good tho, she said can help with ranking our response...",Kshitij,2023-01-20,"Absolutely, that would be great!\nI also had a few conceptual mathematical doubts around Rouge s...",10,"[Hey Scott, I think thats a great idea. We are looking for more participants in the project. Wha...","[Hey Scott, I think thats a great idea. We are looking for more participants in the project. Wha...",18,25,Scott-Kshitij


----
## Deduped Retrieval Dataframe ##

In [60]:
df_messages_deduped=df_messages.iloc[[4,13,15,27,28]]
df_messages_deduped=df_messages_deduped.reset_index(drop=True)
df_messages_deduped[['sender','replier','message','Replier_Emails_Sender']]

Unnamed: 0,sender,replier,message,Replier_Emails_Sender
0,Scott,Radhika,I like baseball too. Can you do sports betting on badminton?,"[Hey Scott,\n\nI appreciate the invite a lot. But I don't enjoy bball. Can we go watch orchestra..."
1,Scott,Kshitij,I heard that place is delish. Lets go and then maybe we can check out some turtle racing at Tin ...,"[Hey Scott, I think thats a great idea. We are looking for more participants in the project. Wha..."
2,Radhika,Kshitij,"Hey Kshitij,\n\nHope you are doing well!\n\nHere are the minutes of meeting from yesterday's cal...","[Hey Radhika, \nI have office on Wednesdays, so would Thursday work? Maybe Thursday 4 pm?\nWhat ..."
3,Kshitij,Scott,"Scott, given that it is a long 8 hour drive and we have 4 people already in the car, do you thin...","[Yoooo, Im pretty sure its November 18-20. Its gonna be sick. Which part are you most excited fo..."
4,Kshitij,Radhika,"Hey Radhika, were you able to register for the DSI Conference? Best, Kshitij","[Hello Kshitij,\n\nThat would be great, yes! I would love to attend a conference like that one. ..."


In [43]:
# df_messages_deduped=pd.read_csv('gs://user-scripts-msca310019-capstone-49b3/data/20231026_Emails_Deduped.csv', parse_dates=['time'])
# df_messages_deduped['Sender_Receiver_Emails_list'] = df_messages_deduped['Sender_Receiver_Emails'].apply(lambda x: ast.literal_eval(x) if pd.notna(x) else [])
# df_messages_deduped.head(2)

----
## Make Changes here

In [10]:
os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')

if 'OPENAI_API_KEY' in os.environ:
    openai_api_key = os.environ['OPENAI_API_KEY']
    print("OpenAI API Key: is set")
else:
    print("OPENAI_API_KEY environment variable is not set.")

OpenAI API Key: ········


OpenAI API Key: is set


-----
## First LLM Chain - Pairwise Writing Style Langchain

In [90]:
# past_emails=(email_retrieval_dataset[email_retrieval_dataset.sender_replier_thread==sender_replier_id]['Replier_Emails_Sender']).to_list()[0][-num_emails:]

past_emails=df_messages_deduped['Replier_Emails_Sender'][1]
past_emails

pairstyle_prompt="""Extract American Communication Style in 5 words between the sender and replier by analyzing these past emails between them {past_emails}"""
pairstyle_prompt_template = ChatPromptTemplate.from_template(template=relationship_prompt)    
pairstyle_prompt_template

llm_chain_pairstyle=LLMChain(llm=llm_endpoint, prompt=relationship_prompt_template, output_key='pair_style')
llm_chain_pairstyle

print(df_messages_deduped['replier'][1])
print(df_messages_deduped['sender'][1])
print(llm_chain_pairstyle.run(past_emails=past_emails))

df_pair_style=df_messages_deduped
pair_style_list=[]

for index, row in df_messages_deduped.iterrows():
    pair_style=llm_chain_pairstyle.run(past_emails=row['Replier_Emails_Sender'])
    pair_style_list.append(pair_style)

df_pair_style['pair_style']=pair_style_list
df_pair_style

Kshitij
Scott
Collaborative, friendly, informal, helpful, accommodating.


---
## First LLM Chain - Email Generation Chain

In [74]:
# INPUT VARIABLES 
'sender_id'
'replier_id'
'sender_email'

# PERSONALIZATION VARIABLES
num_emails= 5 #FOR RETRIEVEL + RANKING
email_retrieval_dataset=df_messages_deduped # FOR RETRIEVAL DATABASE
vector_db_client=chroma_client # FOR RANKING VECTOR DATABASE


# TEXT GENERATION CONTROL
api_key=openai_api_key
llm_model='gpt-3.5-turbo-0301' # CAN CHANGE
llm_endpoint=ChatOpenAI(temperature=0.1, model=llm_model, openai_api_key=openai_api_key) # CAN CHANGE


# VALIDATION VARIABLES
df_validate = df_messages_deduped
sample_size = len(df_messages_deduped)

In [75]:
# PROMPT

template_string_1="""Create a response to the following email {sender_email}"""
template_string_2="""Create a response to the following email {sender_email}, deriving context from these relevant emails {relevant_emails} """
template_string_3="""You are the person receiving this email {sender_email}, Write a reply to the email as the person who recieved it, deriving context and writing style and email length from previous relevant emails from the person {relevant_emails}"""
template_string_4="""You are the person recieving this email {sender_email}, Write a reply to the email as the person who recieved it, deriving context and writing style and email length from previous relevant emails from the person given: {relevant_emails}, Make sure to use salutation and signature style similar to the revelant emails above."""
template_string_5="""You are the person recieving this email enclosed {sender_email}, Derive the context from past relevant emails by the person: {relevant_emails}, extract the writing style from the relevant emails. 
Identify the sentiment from the relevant emails, extract the average number of tokens from the relevant emails. 
Write a reply to the email in the angle brackets as the person recieving the email, including the context, writing style, average number of tokens,sentiment derived from the above steps"""
template_string_6="""Create a response to the following email : <{sender_email}>, deriving context from these relevant emails: {relevant_emails}, using a conversation style similar to following past emails between these 2 users : {prev_emails}"""

list_prompts=[template_string_1, template_string_2, template_string_3, template_string_4, template_string_5, template_string_6]


In [76]:
# Email Environment
sender_id='Kshitij'
replier_id='Scott'

# sender_email='Hey Scott, Were you able to check with Aarushi if she would be available on Friday for the project meeting?'
sender_email='Hey Scott, is Radhika coming to the Michigan trip with us?'

In [78]:
email_retrieval_dataset=df_messages_deduped
num_emails=num_emails
vector_db_client=chroma_client
api_key=openai_api_key
llm_endpoint=llm_endpoint
template_string=template_string_4

In [122]:
# sender_replier_id='-'.join([sender_id, replier_id])
# previous_emails=(email_retrieval_dataset[email_retrieval_dataset.sender_replier_thread==sender_replier_id]['Replier_Emails_Sender']).to_list()[0][-num_emails:]
# previous_emails
# Second, getting ranked responses as per context ------------------

    # Building the Langchain vectorstore using chroma collections
user_vector_store = Chroma(
    client=vector_db_client, 
    collection_name='user'+str(replier_id),
    embedding_function=OpenAIEmbeddings())
    # Getting ranked responses using MMR
    
found_rel_emails = await user_vector_store.amax_marginal_relevance_search(sender_email, k=num_emails, fetch_k=num_emails)
list_rel_emails=[]
for i, doc in enumerate(found_rel_emails):
    list_rel_emails.append(doc.page_content)
list_rel_emails


# Setting up LangChain
prompt_template_emailgen = ChatPromptTemplate.from_template(template=template_string)    
llm_chain_emailgen=LLMChain(llm=llm_endpoint, prompt=prompt_template_emailgen, output_key='Global_Context_Email')

print(llm_chain_emailgen.run(sender_email=sender_email, relevant_emails=list_rel_emails))

AuthenticationError: Incorrect API key provided: sk-KQ8hW***************************************ELV8. You can find your API key at https://platform.openai.com/account/api-keys.

----
## Second LLM Chain - Local Context from Threads

---
## Sequential LLM Chain for Pair and Email Gen

In [97]:
from langchain.chains import SimpleSequentialChain
from langchain.chains import SequentialChain

In [84]:
# Email Generation Chain
llm_chain_emailgen

LLMChain(prompt=ChatPromptTemplate(input_variables=['relevant_emails', 'sender_email'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['relevant_emails', 'sender_email'], template='You are the person recieving this email {sender_email}, Write a reply to the email as the person who recieved it, deriving context and writing style and email length from previous relevant emails from the person given: {relevant_emails}, Make sure to use salutation and signature style similar to the revelant emails above.'))]), llm=ChatOpenAI(client=<class 'openai.api_resources.chat_completion.ChatCompletion'>, model_name='gpt-3.5-turbo-0301', temperature=0.1, openai_api_key='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', openai_api_base='', openai_organization='', openai_proxy=''), output_key='First_Email')

In [92]:
# Pair Style Extraction Chain
llm_chain_pairstyle

LLMChain(prompt=ChatPromptTemplate(input_variables=['past_emails'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['past_emails'], template='Extract American Communication Style in 5 words between the sender and replier by analyzing these past emails between them {past_emails}'))]), llm=ChatOpenAI(client=<class 'openai.api_resources.chat_completion.ChatCompletion'>, model_name='gpt-3.5-turbo-0301', temperature=0.1, openai_api_key='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', openai_api_base='', openai_organization='', openai_proxy=''), output_key='pair_style')

In [109]:
template_string_personal="""Take this email {First_Email}, and update it slightly using these adjectives {pair_style}"""
prompt_template_personalization=ChatPromptTemplate.from_template(template=template_string_personal)
llm_chain_personalization=LLMChain(llm=llm_endpoint, prompt=prompt_template_personalization, output_key='Personalized_Email')
llm_chain_personalization

LLMChain(prompt=ChatPromptTemplate(input_variables=['First_Email', 'pair_style'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['First_Email', 'pair_style'], template='Take this email {First_Email}, and update it slightly using these adjectives {pair_style}'))]), llm=ChatOpenAI(client=<class 'openai.api_resources.chat_completion.ChatCompletion'>, model_name='gpt-3.5-turbo-0301', temperature=0.1, openai_api_key='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', openai_api_base='', openai_organization='', openai_proxy=''), output_key='Personalized_Email')

In [117]:
## Overall Chain

from langchain.chains import SequentialChain
chain_overall = SequentialChain(chains=[llm_chain_emailgen, llm_chain_pairstyle, llm_chain_personalization],
                                input_variables=['relevant_emails','sender_email','past_emails'],
                                output_variables=['Personalized_Email']
                               )

chain_overall

SequentialChain(chains=[LLMChain(prompt=ChatPromptTemplate(input_variables=['relevant_emails', 'sender_email'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['relevant_emails', 'sender_email'], template='You are the person recieving this email {sender_email}, Write a reply to the email as the person who recieved it, deriving context and writing style and email length from previous relevant emails from the person given: {relevant_emails}, Make sure to use salutation and signature style similar to the revelant emails above.'))]), llm=ChatOpenAI(client=<class 'openai.api_resources.chat_completion.ChatCompletion'>, model_name='gpt-3.5-turbo-0301', temperature=0.1, openai_api_key='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', openai_api_base='', openai_organization='', openai_proxy=''), output_key='First_Email'), LLMChain(prompt=ChatPromptTemplate(input_variables=['past_emails'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables

In [118]:
# Email Environment
sender_id='Kshitij'
replier_id='Scott'
sender_email='Hey Scott, is Radhika coming to the Michigan trip with us?'

user_vector_store = Chroma(
    client=vector_db_client, 
    collection_name='user'+str(replier_id),
    embedding_function=OpenAIEmbeddings())
    # Getting ranked responses using MMR
    
found_rel_emails = await user_vector_store.amax_marginal_relevance_search(sender_email, k=num_emails, fetch_k=num_emails)
list_rel_emails=[]
for i, doc in enumerate(found_rel_emails):
    list_rel_emails.append(doc.page_content)

sender_replier_id='-'.join([sender_id, replier_id])
past_emails=(email_retrieval_dataset[email_retrieval_dataset.sender_replier_thread==sender_replier_id]['Replier_Emails_Sender']).to_list()[0]

In [121]:
chain_overall({"relevant_emails": list_rel_emails, "sender_email": sender_email, "past_emails":past_emails})['Personalized_Email']

"Hey there,\n\nWhat's up, my dude? Radhika is totally down to join us on the Michigan trip. Not sure who else is riding shotgun with us, but as long as Aarushi is in the mix, I'm chillin'. Do we need to bring any other crap for this adventure?\n\nCheers,\nScott"


---
## BACKEND FUNCTIONS, DO NOT CHANGE

In [54]:
# SINGLE RESPONSE GENERATION
async def get_email_response_personalized(sender_id,
                                    replier_id,
                                    sender_email,
                                    email_retrieval_dataset=df_messages_deduped,
                                    num_emails=num_emails,
                                    vector_db_client=chroma_client,
                                    api_key=openai_api_key,
                                    llm_endpoint=llm_endpoint,
                                    template_string=template_string_1):
    
    # First getting retrieved emails to understand conversation --------
    sender_replier_id='-'.join([sender_id, replier_id])
    previous_emails=(email_retrieval_dataset[email_retrieval_dataset.sender_replier_thread==sender_replier_id]['Replier_Emails_Sender']).to_list()[0][-num_emails:]
    previous_emails
    # Second, getting ranked responses as per context ------------------
        
        # Building the Langchain vectorstore using chroma collections
    user_vector_store = Chroma(
        client=vector_db_client, 
        collection_name='user'+str(replier_id),
        embedding_function=OpenAIEmbeddings())
        # Getting ranked responses using MMR
    found_rel_emails = await user_vector_store.amax_marginal_relevance_search(sender_email, k=num_emails, fetch_k=num_emails)
    list_rel_emails=[]
    for i, doc in enumerate(found_rel_emails):
        list_rel_emails.append(doc.page_content)
    list_rel_emails
    
    
    # Setting up LangChain
    prompt_template = ChatPromptTemplate.from_template(template=template_string)    
    llm_chain=LLMChain(llm=llm_endpoint, prompt=prompt_template)
    
    return llm_chain.run(sender_email=sender_email, prev_emails=previous_emails, relevant_emails=list_rel_emails)
    # print(llm_chain.run(sender_email=sender_email, prev_emails=previous_emails, relevant_emails=list_rel_emails))
    

In [55]:
%time

for prompt in list_prompts:
    await get_email_response_personalized(template_string=prompt)
    print("------")

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 6.68 µs


TypeError: get_email_response_personalized() missing 3 required positional arguments: 'sender_id', 'replier_id', and 'sender_email'

In [63]:
async def process_all_emails_prompts(df_validate,
                                    list_prompts=list_prompts):
    df_emails = df_validate
    df_output = pd.DataFrame()
    
    sender_list=[]
    replier_list=[]
    
    sender_message_list=[]
    replier_message_list=[]
    
    responses=[]
    prompts=[]
    prompts_id=[]
    
    for prompt_index, prompt_body in enumerate(list_prompts):
        for index, row in df_emails.iterrows():
            response = await get_email_response_personalized(sender_id = str(row['sender']), 
                                                             replier_id= str(row['replier']), 
                                                             sender_email = str(row['message']), 
                                                             email_retrieval_dataset=email_retrieval_dataset, 
                                                             api_key=api_key, 
                                                             vector_db_client=vector_db_client, template_string=prompt_body)
            
            sender_list.append(str(row['sender']))
            replier_list.append(str(row['replier']))
            
            sender_message_list.append(row['message'])
            replier_message_list.append(row['reply_message'])
            
            responses.append(response)
            prompts.append(prompt_body)
            prompts_id.append(prompt_index)
            
            
        # time.sleep(20)
        
    df_output['sender']=sender_list
    df_output['replier']=replier_list
    df_output['message']=sender_message_list
    df_output['actual_reply']=replier_message_list
    df_output['generated_reply']=responses
    df_output['prompt_body']=prompts
    df_output['prompt_ID']=prompts_id
    
    return df_output

----
## Run Tests

In [56]:
# This will take a sample of data from df_messages_deduped, and generate email replies for it

%time

test_df= await process_all_emails_prompts(df_validate=df_messages_deduped,list_prompts=list_prompts)
test_df

CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 7.15 µs


NameError: name 'process_all_emails_prompts' is not defined

In [65]:
test_df.to_csv('Human_Validation_Test_Dataset.csv',index=False)

----
## New Data Set

In [57]:
%time

await get_email_response_personalized(sender_id=sender_id,
                                      replier_id=replier_id,
                                      sender_email=sender_email,
                                      template_string=template_string_4)

CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 7.15 µs


TypeError: bad operand type for unary -: 'tuple'

----
## Relationship Langchain