In [1]:
import re
import json
import os
from openai import OpenAI
from dotenv import load_dotenv
from utils.labeling_utils import re_analyze_email

In [2]:
test_data_path = "./data/PAIRED_test.json"
with open(test_data_path, 'r') as file:
    test_data = json.load(file)

train_data_path = "./data/PAIRED_train.json"
with open(train_data_path, 'r') as file:
    train_data = json.load(file)

## Baseline: Using Regular Expressions


In [3]:
re_analyze_email(test_data[0]['content'])

{'Spam': 'Yes',
 'Subject': 'WELLS GARDNER DISPLAYS',
 'Sender': 'BERT::MEYETTE',
 'send_date': '1992-02-04',
 'Time_Sensitive': 'N/A',
 'Start': 'N/A',
 'End': 'N/A',
 'Type': 'N/A',
 'Category': 'N/A',
 'Format': 'N/A',
 'Location': 'N/A',
 'Action_Required': 'No',
 'Priority_Level': 'Low'}

In [4]:
test_data[0]['label']

{'Spam': 'No',
 'Subject': 'WELLS GARDNER DISPLAYS',
 'Sender': 'BERT::MEYETTE',
 'send_date': '1992-02-04',
 'Time_Sensitive': 'Yes',
 'Start': '1992-02-18 00:00',
 'End': '',
 'Type': 'Reminder',
 'Category': 'Work',
 'Format': 'In-person',
 'Location': 'Wells Gardner',
 'Action_Required': 'Yes',
 'Priority_Level': 'High'}

## Method 1: GPT with instructions

In [5]:
# Load the environment variables from the .env file
# In this .env, it contains openai's API Key.
load_dotenv()
# Load the key to call the client.
client = OpenAI()
model_name="gpt-4o-mini"

In [6]:
system_prompt = """You are a personal secretary. You are an expert of analyzing emails and summarize
them into required form.
"""

task_prompt = """Your task is to label received emails into a template:
{
    "Spam": "Yes" / "No",
    "Subject": ,
    "Sender": ,
    "send_date": ,
    "Time_Sensitive": "Yes" / "No",
    "Start": ,
    "End": ,
    "Type": "Event" / "Reminder" / "N/A",
    "Category": "Work" / "Study" / "Leisure",
    "Format": "Online" / "In-person",
    "Location": ,
    "Action_Required": "Yes" / "No",
    "Priority_Level": "Low" / "Medium" / "High" / "Urgent"
}
For the key-value pair in the dict, every key is necessary, the value is required field.
Use standard time format like '1992-01-10 15:30' for time or '1992-01-10' for date.
"""



In [7]:
def chat_gpt_label(email, temperature=1.0, model=model_name):
    '''
    prompt: 
    model: 
    '''
    task_prompt_ = task_prompt + f"Email: {email}; Label:"
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": task_prompt_}
        ]
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=temperature,
    )
    return response.choices[0].message.content

In [9]:
label = chat_gpt_label(test_data[0]['content'])

In [10]:
json.loads(label)

{'Spam': 'No',
 'Subject': 'WELLS GARDNER DISPLAYS',
 'Sender': 'BERT::MEYETTE',
 'send_date': '1992-02-04',
 'Time_Sensitive': 'Yes',
 'Start': '1992-02-18',
 'End': '1992-02-18',
 'Type': 'Event',
 'Category': 'Work',
 'Format': 'In-person',
 'Location': 'Wells Gardner',
 'Action_Required': 'Yes',
 'Priority_Level': 'High'}

In [11]:
test_data[0]['label']

{'Spam': 'No',
 'Subject': 'WELLS GARDNER DISPLAYS',
 'Sender': 'BERT::MEYETTE',
 'send_date': '1992-02-04',
 'Time_Sensitive': 'Yes',
 'Start': '1992-02-18 00:00',
 'End': '',
 'Type': 'Reminder',
 'Category': 'Work',
 'Format': 'In-person',
 'Location': 'Wells Gardner',
 'Action_Required': 'Yes',
 'Priority_Level': 'High'}

## Method 1: GPT with instructions + examples

In [21]:
system_prompt_eg = """You are a personal secretary. You are an expert of analyzing emails and summarize
them into required form.
"""

task_prompt_eg = f"""Your task is to analyze received emails and label them into the following template:
{{
    "Spam": "Yes" / "No",
    "Subject": "string",
    "Sender": "string",
    "send_date": "YYYY-MM-DD",
    "Time_Sensitive": "Yes" / "No",
    "Start": "YYYY-MM-DD HH:MM",
    "End": "YYYY-MM-DD HH:MM",
    "Type": "Event" / "Reminder" / "N/A",
    "Category": "Work" / "Study" / "Leisure",
    "Format": "Online" / "In-person",
    "Location": "string",
    "Action_Required": "Yes" / "No",
    "Priority_Level": "Low" / "Medium" / "High" / "Urgent"
}}

### Instructions:
1. Every key in the template is required, and the value for each key must be provided.
2. Use the following formats:
   - **Time**: 'YYYY-MM-DD HH:MM' (e.g., '1992-01-10 15:30')
   - **Date**: 'YYYY-MM-DD' (e.g., '1992-01-10')
3. Analyze the email content carefully to extract the appropriate values for each field.
4. Email with only a start stamp is more likely a reminder; with both start and end stamp is an event

Here are some examples for reference:

Email: {train_data[0]["content"]}
Label: {train_data[0]["label"]}

Email: {train_data[1]["content"]}
Label: {train_data[1]["label"]}

Email: {train_data[2]["content"]}
Label: {train_data[2]["label"]}

Email: {train_data[3]["content"]}
Label: {train_data[3]["label"]}

Email: {train_data[4]["content"]}
Label: {train_data[4]["label"]}
"""

In [22]:
def chat_gpt_label_eg(email, temperature=1.0, model=model_name):
    '''
    prompt: 
    model: 
    '''
    task_prompt_ = task_prompt_eg + f"Email: {email}; Label:"
    messages = [
        {"role": "system", "content": system_prompt_eg},
        {"role": "user", "content": task_prompt_}
        ]
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=temperature,
    )
    return response.choices[0].message.content

In [23]:
label_eg = chat_gpt_label_eg(test_data[0]['content'])

In [24]:
json.loads(label_eg)

{'Spam': 'No',
 'Subject': 'Wells Gardner Displays',
 'Sender': 'Rick',
 'send_date': '1992-02-04',
 'Time_Sensitive': 'Yes',
 'Start': '1992-02-18 00:00',
 'End': '',
 'Type': 'Reminder',
 'Category': 'Work',
 'Format': 'In-person',
 'Location': 'Wells Gardner',
 'Action_Required': 'Yes',
 'Priority_Level': 'High'}

In [25]:
test_data[0]['label']

{'Spam': 'No',
 'Subject': 'WELLS GARDNER DISPLAYS',
 'Sender': 'BERT::MEYETTE',
 'send_date': '1992-02-04',
 'Time_Sensitive': 'Yes',
 'Start': '1992-02-18 00:00',
 'End': '',
 'Type': 'Reminder',
 'Category': 'Work',
 'Format': 'In-person',
 'Location': 'Wells Gardner',
 'Action_Required': 'Yes',
 'Priority_Level': 'High'}