In [5]:
from langchain_openai import OpenAI
import os

In [6]:
os.environ["OPENAI_API_KEY"] = ""

In [7]:
llm = OpenAI(temperature=0.8)


## Summarization Prompts

In [26]:
prompt_1 = """Parse the provided email data and generate the following information as a single line json:

* subject: The subject line of the provided email if present.
* timezone: The timezone of the provided email if available.
* length: The total number of words in the provided email body.
* year: The year extracted from the date header in the provided email.
* month: The month extracted from the date header in the provided email.
* recipients: The total number of recipients in the provided email.
* cc_participants: The number of participants in the CC field (0 if None) in the provided email.
* is_reply: A boolean value (0 or 1) indicating whether the email is a reply based on the presence of "Re:" in the provided email.
* summary: A concise summary of the email content, capturing key points like:
    * Reason for the email 
    * Actions proposed 
    * Any mentioned deadlines or timeframes.
* response: Generate an email response to the provided email in the same format as the input email from the perspective of the main recipient.

**Email:**

"""

email = 'Date: Fri, 15 Mar 2001 07:39:00 -0800 (PST)\nFrom: ravi.allen@enron.com\nTo: stagecoachmama@hotmail.com\nSubject: \nBody: \nLucy,\nHere is the rentroll.\nMy only questions are about #25, and #37 missed rent.  Any special \nreasons?\nIt looks like there are five vacancies #2,12,20a,35,40.  If you want to run \nan ad in the paper with a $50 discount that is fine.\nI will write you a letter of recommendation.  When do you need it?  You can \nuse me as a reference.  In the next five weeks we should really have a good \nidea whether the sale is going through.\nPhillip'


final_input = prompt_1 + email

In [27]:
final_input

'Parse the provided email data and generate the following information as a single line json:\n\n* subject: The subject line of the provided email if present.\n* timezone: The timezone of the provided email if available.\n* length: The total number of words in the provided email body.\n* year: The year extracted from the date header in the provided email.\n* month: The month extracted from the date header in the provided email.\n* recipients: The total number of recipients in the provided email.\n* cc_participants: The number of participants in the CC field (0 if None) in the provided email.\n* is_reply: A boolean value (0 or 1) indicating whether the email is a reply based on the presence of "Re:" in the provided email.\n* summary: A concise summary of the email content, capturing key points like:\n    * Reason for the email \n    * Actions proposed \n    * Any mentioned deadlines or timeframes.\n* response: Generate an email response to the provided email in the same format as the inp

In [37]:
pred = llm.invoke(final_input)
pred = pred.lstrip("\n\n")

In [38]:
pred

'{"subject": "", "timezone": "PST", "length": 84, "year": "2001", "month": "Mar", "recipients": 1, "cc_participants": 0, "is_reply": 0, "summary": "Questions about #25 and #37 missied rent, 5 vacancies #2, 12, 20a, 35,40, letter of recommendation to be written, good time frame to know about sale: 5 weeks.", "response": "Date: Fri, 15 Mar 2001 07:39:00 -0800 (PST)\\nFrom: stagecoachmama@hotmail.com\\nTo: ravi.allen@enron.com\\nSubject: Re: Rentroll\\nLucy, Yes, please write me a letter of recommendation. As for the sale, it should be finalized within the next five weeks. I will definitely keep you updated. Thank you for your help and support. Regards, Phillip"}'

In [39]:
import json
import re

# Given string

# Replace \n in the keys
pred = re.sub(r'(\n")', '"', pred)

# Convert to dictionary
dictionary = json.loads(pred)

print(dictionary)

{'subject': '', 'timezone': 'PST', 'length': 84, 'year': '2001', 'month': 'Mar', 'recipients': 1, 'cc_participants': 0, 'is_reply': 0, 'summary': 'Questions about #25 and #37 missied rent, 5 vacancies #2, 12, 20a, 35,40, letter of recommendation to be written, good time frame to know about sale: 5 weeks.', 'response': 'Date: Fri, 15 Mar 2001 07:39:00 -0800 (PST)\nFrom: stagecoachmama@hotmail.com\nTo: ravi.allen@enron.com\nSubject: Re: Rentroll\nLucy, Yes, please write me a letter of recommendation. As for the sale, it should be finalized within the next five weeks. I will definitely keep you updated. Thank you for your help and support. Regards, Phillip'}


In [40]:
import json

def convert_to_json(gpt_output):
    try:
        cleaned_output = gpt_output.strip().replace('\n', '')
        json_object = json.loads(cleaned_output)
        return json_object
    except json.JSONDecodeError as e:
        print("Failed to decode JSON:", e)
        return None



In [41]:
json_data = convert_to_json(pred)
json_data["original_email"] = email
json_data

{'subject': '',
 'timezone': 'PST',
 'length': 84,
 'year': '2001',
 'month': 'Mar',
 'recipients': 1,
 'cc_participants': 0,
 'is_reply': 0,
 'summary': 'Questions about #25 and #37 missied rent, 5 vacancies #2, 12, 20a, 35,40, letter of recommendation to be written, good time frame to know about sale: 5 weeks.',
 'response': 'Date: Fri, 15 Mar 2001 07:39:00 -0800 (PST)\nFrom: stagecoachmama@hotmail.com\nTo: ravi.allen@enron.com\nSubject: Re: Rentroll\nLucy, Yes, please write me a letter of recommendation. As for the sale, it should be finalized within the next five weeks. I will definitely keep you updated. Thank you for your help and support. Regards, Phillip',
 'original_email': 'Date: Fri, 15 Mar 2001 07:39:00 -0800 (PST)\nFrom: ravi.allen@enron.com\nTo: stagecoachmama@hotmail.com\nSubject: \nBody: \nLucy,\nHere is the rentroll.\nMy only questions are about #25, and #37 missed rent.  Any special \nreasons?\nIt looks like there are five vacancies #2,12,20a,35,40.  If you want to ru