In [1]:
import re
import json
import os
from openai import OpenAI
from dotenv import load_dotenv
from utils.labeling_utils import re_analyze_email, gpt_label, gpt_label_eg
# from utils.gpt_utils import gpt_completion

In [2]:
test_data_path = "./data/PAIRED_test.json"
with open(test_data_path, 'r') as file:
    test_data = json.load(file)

train_data_path = "./data/PAIRED_train.json"
with open(train_data_path, 'r') as file:
    train_data = json.load(file)

## Baseline: Using Regular Expressions


In [3]:
re_analyze_email(test_data[0]['content'])

{'Spam': 'Yes',
 'Subject': 'WELLS GARDNER DISPLAYS',
 'Sender': 'BERT::MEYETTE',
 'send_date': '1992-02-04',
 'Time_Sensitive': 'N/A',
 'Start': 'N/A',
 'End': 'N/A',
 'Type': 'N/A',
 'Category': 'N/A',
 'Format': 'N/A',
 'Location': 'N/A',
 'Action_Required': 'No',
 'Priority_Level': 'Low'}

In [4]:
test_data[0]['label']

{'Spam': 'No',
 'Subject': 'WELLS GARDNER DISPLAYS',
 'Sender': 'BERT::MEYETTE',
 'send_date': '1992-02-04',
 'Time_Sensitive': 'Yes',
 'Start': '1992-02-18 00:00',
 'End': '',
 'Type': 'Reminder',
 'Category': 'Work',
 'Format': 'In-person',
 'Location': 'Wells Gardner',
 'Action_Required': 'Yes',
 'Priority_Level': 'High'}

## Method 1: GPT with instructions

PROMPT:

Your task is to label received emails into a template:
{

    "Spam": "Yes" / "No",
    "Subject": ,
    "Sender": ,
    "send_date": ,
    "Time_Sensitive": "Yes" / "No",
    "Start": ,
    "End": ,
    "Type": "Event" / "Reminder" / "N/A",
    "Category": "Work" / "Study" / "Leisure",
    "Format": "Online" / "In-person",
    "Location": ,
    "Action_Required": "Yes" / "No",
    "Priority_Level": "Low" / "Medium" / "High" / "Urgent"
    
}
For the key-value pair in the dict, every key is necessary, the value is required field.
Use standard time format like '1992-01-10 15:30' for time or '1992-01-10' for date.

In [5]:
# Load the environment variables from the .env file
# In this .env, it contains openai's API Key.
load_dotenv()
# Load the key to call the client.
client = OpenAI()
model_name="gpt-4o-mini"

In [6]:
label = gpt_label(client, test_data[0]['content'], temperature=1.0, model=model_name)

In [7]:
json.loads(label)

{'Spam': 'No',
 'Subject': 'WELLS GARDNER DISPLAYS',
 'Sender': 'BERT::MEYETTE',
 'send_date': '1992-02-04',
 'Time_Sensitive': 'Yes',
 'Start': '1992-02-18 00:00',
 'End': '1992-02-18 23:59',
 'Type': 'Event',
 'Category': 'Work',
 'Format': 'In-person',
 'Location': 'Wells Gardner',
 'Action_Required': 'Yes',
 'Priority_Level': 'High'}

In [8]:
test_data[0]['label']

{'Spam': 'No',
 'Subject': 'WELLS GARDNER DISPLAYS',
 'Sender': 'BERT::MEYETTE',
 'send_date': '1992-02-04',
 'Time_Sensitive': 'Yes',
 'Start': '1992-02-18 00:00',
 'End': '',
 'Type': 'Reminder',
 'Category': 'Work',
 'Format': 'In-person',
 'Location': 'Wells Gardner',
 'Action_Required': 'Yes',
 'Priority_Level': 'High'}

## Method 2: GPT with crafted instructions + examples

PROMPT + several examples from train_set

In [9]:
label = gpt_label_eg(client, test_data[0]['content'], train_data, temperature=0.7, model=model_name)

In [10]:
json.loads(label)

{'Spam': 'No',
 'Subject': 'Wells Gardner Displays',
 'Sender': 'BERT::MEYETTE',
 'send_date': '1992-02-04',
 'Time_Sensitive': 'Yes',
 'Start': '1992-02-18 00:00',
 'End': '',
 'Type': 'Reminder',
 'Category': 'Work',
 'Format': 'In-person',
 'Location': 'Wells Gardner',
 'Action_Required': 'Yes',
 'Priority_Level': 'High'}

In [11]:
test_data[0]['label']

{'Spam': 'No',
 'Subject': 'WELLS GARDNER DISPLAYS',
 'Sender': 'BERT::MEYETTE',
 'send_date': '1992-02-04',
 'Time_Sensitive': 'Yes',
 'Start': '1992-02-18 00:00',
 'End': '',
 'Type': 'Reminder',
 'Category': 'Work',
 'Format': 'In-person',
 'Location': 'Wells Gardner',
 'Action_Required': 'Yes',
 'Priority_Level': 'High'}

## Method 2: GPT with instructions + examples

In [71]:
def generate_examples(example_list):
    assert len(example_list) >= 4
    example = f"""
    Email: {example_list[0]["content"]}
    Label: {example_list[0]["label"]}

    Email: {example_list[1]["content"]}
    Label: {example_list[1]["label"]}

    Email: {example_list[2]["content"]}
    Label: {example_list[2]["label"]}

    Email: {example_list[3]["content"]}
    Label: {example_list[3]["label"]}

    Email: {example_list[4]["content"]}
    Label: {example_list[4]["label"]}
    """
    return example

In [72]:
generate_examples(train_data)

'\n    Email: From: Wei Zhang  \nDate: 2024-11-11  \nSubject: Important: Our Team Meeting Scheduled for November 22 at 3 PM  \n\nDear Team,\n\nI hope this message finds you well. I wanted to remind you of our upcoming team meeting scheduled for November 22, 2024, at 3:00 PM. The exact location will be determined closer to the date, and I will provide an update as soon as possible.\n\nYour presence and input will be valuable as we discuss important matters affecting our projects. Please mark your calendars and prepare any topics you would like to discuss.\n\nThank you for your attention, and I look forward to seeing all of you there!\n\nBest regards,  \nWei Zhang\n    Label: {\'Spam\': \'No\', \'Subject\': \'Important: Our Team Meeting Scheduled for November 22 at 3 PM\', \'Sender\': \'Wei Zhang\', \'send_date\': \'2024-11-20\', \'Time_Sensitive\': \'Yes\', \'Start\': \'2024-11-22 15:00\', \'End\': \'\', \'Type\': \'Reminder\', \'Category\': \'Work\', \'Format\': \'In-person\', \'Locati

In [73]:
system_prompt_eg = """You are a personal secretary. You are an expert of analyzing emails and summarize
them into required form.
"""

task_prompt_eg_ = f"""Your task is to analyze received emails and label them into the following template:
{{
    "Spam": "Yes" / "No",
    "Subject": "string",
    "Sender": "string",
    "send_date": "YYYY-MM-DD",
    "Time_Sensitive": "Yes" / "No",
    "Start": "YYYY-MM-DD HH:MM",
    "End": "YYYY-MM-DD HH:MM",
    "Type": "Event" / "Reminder" / "N/A",
    "Category": "Work" / "Study" / "Leisure",
    "Format": "Online" / "In-person",
    "Location": "string",
    "Action_Required": "Yes" / "No",
    "Priority_Level": "Low" / "Medium" / "High" / "Urgent"
}}

### Instructions:
1. Every key in the template is required, and the value for each key must be provided.
2. Use the following formats:
   - **Time**: 'YYYY-MM-DD HH:MM' (e.g., '1992-01-10 15:30')
   - **Date**: 'YYYY-MM-DD' (e.g., '1992-01-10')
3. Analyze the email content carefully to extract the appropriate values for each field.
4. Email with only a start stamp is more likely a reminder; with both start and end stamp is an event

Here are some examples for reference:

Email: {train_data[0]["content"]}
Label: {train_data[0]["label"]}

Email: {train_data[1]["content"]}
Label: {train_data[1]["label"]}

Email: {train_data[2]["content"]}
Label: {train_data[2]["label"]}

Email: {train_data[3]["content"]}
Label: {train_data[3]["label"]}

Email: {train_data[4]["content"]}
Label: {train_data[4]["label"]}
"""

task_prompt_eg = f"""Your task is to analyze received emails and label them into the following template:
{{
    "Spam": "Yes" / "No",
    "Subject": "string",
    "Sender": "string",
    "send_date": "YYYY-MM-DD",
    "Time_Sensitive": "Yes" / "No",
    "Start": "YYYY-MM-DD HH:MM",
    "End": "YYYY-MM-DD HH:MM",
    "Type": "Event" / "Reminder" / "N/A",
    "Category": "Work" / "Study" / "Leisure",
    "Format": "Online" / "In-person",
    "Location": "string",
    "Action_Required": "Yes" / "No",
    "Priority_Level": "Low" / "Medium" / "High" / "Urgent"
}}

### Instructions:
1. Every key in the template is required, and the value for each key must be provided.
2. Use the following formats:
   - **Time**: 'YYYY-MM-DD HH:MM' (e.g., '1992-01-10 15:30')
   - **Date**: 'YYYY-MM-DD' (e.g., '1992-01-10')
3. Analyze the email content carefully to extract the appropriate values for each field.
4. Email with only a start stamp is more likely a reminder; with both start and end stamp is an event

Here are some examples for reference:

"""

In [74]:
def chat_gpt_label_eg(client, email, temperature=1.0, model=model_name):
    '''
    prompt: 
    model: 
    '''
    task_prompt_ = task_prompt_eg  + generate_examples(train_data) + f"Email: {email}; Label:"
    return gpt_completion(client, system_prompt, task_prompt_, temperature, model_name)

In [75]:
label_eg = chat_gpt_label_eg(client, test_data[0]['content'])

In [76]:
label_eg

'{\n    "Spam": "No",\n    "Subject": "WELLS GARDNER DISPLAYS",\n    "Sender": "BERT::MEYETTE",\n    "send_date": "1992-02-04",\n    "Time_Sensitive": "Yes",\n    "Start": "1992-02-18 00:00",\n    "End": "",\n    "Type": "Reminder",\n    "Category": "Work",\n    "Format": "In-person",\n    "Location": "Wells Gardner",\n    "Action_Required": "Yes",\n    "Priority_Level": "Urgent"\n}'

In [77]:
json.loads(label_eg)

{'Spam': 'No',
 'Subject': 'WELLS GARDNER DISPLAYS',
 'Sender': 'BERT::MEYETTE',
 'send_date': '1992-02-04',
 'Time_Sensitive': 'Yes',
 'Start': '1992-02-18 00:00',
 'End': '',
 'Type': 'Reminder',
 'Category': 'Work',
 'Format': 'In-person',
 'Location': 'Wells Gardner',
 'Action_Required': 'Yes',
 'Priority_Level': 'Urgent'}

In [78]:
test_data[0]['label']

{'Spam': 'No',
 'Subject': 'WELLS GARDNER DISPLAYS',
 'Sender': 'BERT::MEYETTE',
 'send_date': '1992-02-04',
 'Time_Sensitive': 'Yes',
 'Start': '1992-02-18 00:00',
 'End': '',
 'Type': 'Reminder',
 'Category': 'Work',
 'Format': 'In-person',
 'Location': 'Wells Gardner',
 'Action_Required': 'Yes',
 'Priority_Level': 'High'}