In [15]:
import re
import json
import os
import ast
from openai import OpenAI
from dotenv import load_dotenv
from utils.labeling_utils import re_analyze_email, gpt_label, gpt_label_eg, gpt_label_ft
# from utils.gpt_utils import gpt_completion

In [2]:
test_data_path = "./data/PAIRED_test.json"
with open(test_data_path, 'r') as file:
    test_data = json.load(file)

train_data_path = "./data/PAIRED_train.json"
with open(train_data_path, 'r') as file:
    train_data = json.load(file)

## Baseline: Using Regular Expressions


In [35]:
label = re_analyze_email(test_data[0]['content'])

In [37]:
label

{'Spam': 'Yes',
 'Subject': 'WELLS GARDNER DISPLAYS',
 'Sender': 'BERT::MEYETTE',
 'send_date': '1992-02-04',
 'Time_Sensitive': 'N/A',
 'Start': 'N/A',
 'End': 'N/A',
 'Type': 'N/A',
 'Category': 'N/A',
 'Format': 'N/A',
 'Location': 'N/A',
 'Action_Required': 'No',
 'Priority_Level': 'Low'}

In [38]:
test_data[0]['label']

{'Spam': 'No',
 'Subject': 'WELLS GARDNER DISPLAYS',
 'Sender': 'BERT::MEYETTE',
 'send_date': '1992-02-04',
 'Time_Sensitive': 'Yes',
 'Start': '1992-02-18 00:00',
 'End': '',
 'Type': 'Reminder',
 'Category': 'Work',
 'Format': 'In-person',
 'Location': 'Wells Gardner',
 'Action_Required': 'Yes',
 'Priority_Level': 'High'}

## Method 1: GPT with instructions

PROMPT:

Your task is to label received emails into a template:
{

    "Spam": "Yes" / "No",
    "Subject": ,
    "Sender": ,
    "send_date": ,
    "Time_Sensitive": "Yes" / "No",
    "Start": ,
    "End": ,
    "Type": "Event" / "Reminder" / "N/A",
    "Category": "Work" / "Study" / "Leisure",
    "Format": "Online" / "In-person",
    "Location": ,
    "Action_Required": "Yes" / "No",
    "Priority_Level": "Low" / "Medium" / "High" / "Urgent"
    
}
For the key-value pair in the dict, every key is necessary, the value is required field.
Use standard time format like '1992-01-10 15:30' for time or '1992-01-10' for date.

In [5]:
# Load the environment variables from the .env file
# In this .env, it contains openai's API Key.
load_dotenv()
# Load the key to call the client.
client = OpenAI()
model_name="gpt-4o-mini"

In [31]:
label = gpt_label(client, test_data[0]['content'], temperature=0.7, model=model_name)

In [32]:
ast.literal_eval(label)

{'Spam': 'No',
 'Subject': 'WELLS GARDNER DISPLAYS',
 'Sender': 'BERT::MEYETTE',
 'send_date': '1992-02-04',
 'Time_Sensitive': 'Yes',
 'Start': '1992-02-18',
 'End': '1992-02-18',
 'Type': 'Event',
 'Category': 'Work',
 'Format': 'In-person',
 'Location': 'Wells Gardner',
 'Action_Required': 'Yes',
 'Priority_Level': 'High'}

In [33]:
test_data[0]['label']

{'Spam': 'No',
 'Subject': 'WELLS GARDNER DISPLAYS',
 'Sender': 'BERT::MEYETTE',
 'send_date': '1992-02-04',
 'Time_Sensitive': 'Yes',
 'Start': '1992-02-18 00:00',
 'End': '',
 'Type': 'Reminder',
 'Category': 'Work',
 'Format': 'In-person',
 'Location': 'Wells Gardner',
 'Action_Required': 'Yes',
 'Priority_Level': 'High'}

## Method 2: GPT with crafted instructions + examples

PROMPT + several examples from train_set

In [23]:
label = gpt_label_eg(client, test_data[0]['content'], train_data, temperature=0.7, model=model_name)

In [24]:
ast.literal_eval(label)

{'Spam': 'No',
 'Subject': 'Wells Gardner Displays',
 'Sender': 'BERT::MEYETTE',
 'send_date': '1992-02-04',
 'Time_Sensitive': 'Yes',
 'Start': '1992-02-18 00:00',
 'End': '',
 'Type': 'Reminder',
 'Category': 'Work',
 'Format': 'In-person',
 'Location': 'Wells Gardner',
 'Action_Required': 'Yes',
 'Priority_Level': 'High'}

In [25]:
test_data[0]['label']

{'Spam': 'No',
 'Subject': 'WELLS GARDNER DISPLAYS',
 'Sender': 'BERT::MEYETTE',
 'send_date': '1992-02-04',
 'Time_Sensitive': 'Yes',
 'Start': '1992-02-18 00:00',
 'End': '',
 'Type': 'Reminder',
 'Category': 'Work',
 'Format': 'In-person',
 'Location': 'Wells Gardner',
 'Action_Required': 'Yes',
 'Priority_Level': 'High'}

## Method 3: GPT Fine-tuning with 35 data points

Model_ID = ft:gpt-4o-mini-2024-07-18:personal:ft-schedular:AXN6Qt3B

In [26]:
ft_model_name = "ft:gpt-4o-mini-2024-07-18:personal:ft-schedular:AXN6Qt3B"

In [27]:
label = gpt_label_ft(client, test_data[0]['content'], temperature=0.7, model=ft_model_name)

In [28]:
ast.literal_eval(label)

{'Spam': 'No',
 'Subject': 'WELLS GARDNER DISPLAYS',
 'Sender': 'Rick',
 'send_date': '1992-02-04',
 'Time_Sensitive': 'Yes',
 'Start': '1992-02-18 00:00',
 'End': '',
 'Type': 'Reminder',
 'Category': 'Work',
 'Format': 'In-person',
 'Location': 'Wells Gardner',
 'Action_Required': 'Yes',
 'Priority_Level': 'High'}

In [29]:
test_data[0]['label']

{'Spam': 'No',
 'Subject': 'WELLS GARDNER DISPLAYS',
 'Sender': 'BERT::MEYETTE',
 'send_date': '1992-02-04',
 'Time_Sensitive': 'Yes',
 'Start': '1992-02-18 00:00',
 'End': '',
 'Type': 'Reminder',
 'Category': 'Work',
 'Format': 'In-person',
 'Location': 'Wells Gardner',
 'Action_Required': 'Yes',
 'Priority_Level': 'High'}