In [4]:
import os
import openai
import json
import numpy as np
import pandas as pd
from autolabel import LabelingAgent, AutolabelDataset
from datasets import load_dataset as ld
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
import time

In [5]:
openai.api_key = "Enter Open API Key"

In [6]:
from datasets import load_dataset as ld

def load_dataset_as_pandas_dataframe(
    dataset_name: str="trec"
):

    dataset = ld(dataset_name)
    df = dataset["train"].to_pandas()
    df = df.sample(frac=1)

    df_test = dataset["test"].to_pandas()
    df_test = df_test.sample(frac=1)
    
    return df, df_test

In [7]:
df, df_test = load_dataset_as_pandas_dataframe()

In [8]:
id_to_label = {0 : 'ABBR' , 1 : 'ENTY', 2: 'DESC', 3 : 'HUM', 4 : 'LOC', 5 : 'NUM'}
label_to_id= { 'ABBR' : 0 , 'ENTY' : 1, 'DESC': 2, 'HUM' :3, 'LOC' : 4, 'NUM' : 5}
categories = list(label_to_id.keys())

# DataFrame that con

In [9]:
df["coarse_label"] = df["coarse_label"].map(id_to_label)
df_test["coarse_label"] = df_test["coarse_label"].map(id_to_label)

# Let create a text_to_openai_json function

In [10]:
def text_to_openai_json(data,filename):
    """
    Converts a given dataset into a JSON Lines (JSONL) file suitable for OpenAI's GPT-3.5 turbo model.
    
    Args:
        data (DataFrame or similar data structure): Input data containing text and labels.

    The function processes the input data row by row, constructing conversations for each row with a system message, user message, and an assistant message. It then writes the generated conversation data to a JSONL file.
 
    """
    # Initialize an empty list to store conversation data
    message_list = []

    # Iterate through the rows in the input data
    for _, row in data.iterrows():
        # Create a system message as an initial instruction
        system_message = {
            "role": "system",
            "content": f"given the following text: find the category in: {categories} that is most closely associated with it. Return only the category name"
        }

        # Append the system message to the conversation
        message_list.append({"messages": [system_message]})

        # Create a user message based on the 'text' column from the data
        user_message = {
            "role": "user",
            "content": row['text']
        }

        # Append the user message to the conversation
        message_list[-1]["messages"].append(user_message)

        # Create an assistant message based on the 'coarse_label' column from the data
        assistant_message = {
            "role": 'assistant',
            "content": row['coarse_label']
        }

        # Append the assistant message to the conversation
        message_list[-1]["messages"].append(assistant_message)

    # Write the conversation data to a JSON Lines (JSONL) file
    with open(filename, "w") as json_file:
        for message in message_list:
            # Serialize the conversation data to JSON and write it to the file
            json.dump(message, json_file)
            json_file.write("\n")




# Few Shot Prompting


In [11]:
system_content =  f"given the following text: find the category in: {categories} that is most closely associated with it. Return only the category name only in following format"

In [12]:
from sklearn.metrics import precision_recall_fscore_support

In [13]:
def zero_shot_model(data,model_id):
    pred = []
    for row in data["text"]:
        completion = openai.ChatCompletion.create(
            model= model_id ,
            messages=[
                {"role": "system", "content": system_content },
                {"role": "user", "content": row }
            ])
        
        print(f'text: {row}')
        print(completion.choices[0].message.content)
        pred.append(completion.choices[0].message.content)
    pred_df = pd.DataFrame({'text': data["text"], 'coarse_label' : data['coarse_label'], 'few-shot predictions' : pred })
    
    return pred_df

In [14]:
cumulative_increment = 0
model_id = 'gpt-3.5-turbo-0301'
all_model_id = []

In [15]:
def fine_tune_model(model_id,num_label,pred_df):
    incorrection_pred_df = pred_df[pred_df['coarse_label'] != pred_df['few-shot predictions']][:num_label]
    filename = f'ft_increment_{num_label}.jsonl'
    text_to_openai_json(incorrection_pred_df, filename)
    loader = openai.File.create(file=open(filename, "rb"), purpose='fine-tune')
    fine_tuning_job = openai.FineTuningJob.create(training_file=loader.id, model="gpt-3.5-turbo")
    return fine_tuning_job.id

In [16]:
def wait_for_fine_tuning(job_id):
    while True:
        response = openai.FineTuningJob.retrieve(job_id)
        print(response["fine_tuned_model"])
        if response["fine_tuned_model"]:
            print(response["fine_tuned_model"])
            return response["fine_tuned_model"]
        time.sleep(60)  # Check every 60 seconds

In [17]:
def ft_accuracy(data,model_id):
    pred = []
    for row in data["text"]:
        completion = openai.ChatCompletion.create(
            model= model_id ,
            messages=[
                {"role": "system", "content": system_content },
                {"role": "user", "content": row }
            ])
        
        print(f'text: {row}')
        print(completion.choices[0].message.content)
        pred.append(completion.choices[0].message.content)
        
    accuracy = accuracy_score(data['coarse_label'], pred)
    print(f'Accuracy: {accuracy * 100:.2f}%')
    precision, recall, f1, _ = precision_recall_fscore_support(data['coarse_label'], pred, average='macro',zero_division=1)
    
    return accuracy, precision, recall, f1, pred



In [18]:
df_test.head()

Unnamed: 0,text,coarse_label,fine_label
55,What does cc in engines mean ?,DESC,24
266,What is dianetics ?,DESC,24
485,What is cerebral palsy ?,DESC,24
90,The U.S. Department of Treasury first issued p...,ENTY,8
380,Who killed John F. Kennedy ?,HUM,29


In [20]:
pred_df = zero_shot_model(model_id='gpt-3.5-turbo-0301', data = df_test)

text: What does cc in engines mean ?
Category: DESC
text: What is dianetics ?
Category: DESC
text: What is cerebral palsy ?
DESC
text: The U.S. Department of Treasury first issued paper currency for the U.S. during which war ?
The category associated with this text is "HUM".
text: Who killed John F. Kennedy ?
The category for this text is not clear, as it is an open-ended question and does not provide specific information for classification.
text: Which mountain range in North America stretches from Maine to Georgia ?
DESC
text: What city 's newspaper is called `` The Star '' ?
DESC
text: What is the Milky Way ?
DESC
text: What is the scientific name for elephant ?
The scientific name for the elephant is Loxodonta (African elephants) or Elephas (Asian elephants).
text: What is nuclear power ?
Category: DESC
text: Who is a German philosopher ?
Category: HUM
text: The sun 's core , what is the temperature ?
DESC
text: What was the most popular toy in 1957 ?
Category: DESC
text: What is A

# Fine-Tuning GPT-3.5 Turbo on Iterative Label Increase

In [21]:
model_ids = []
accs = [] 
precisions = [] 
recalls = [] 
f1s = []
label_count = 0
for i in range(15):
    label_count += 10
    ft_id = fine_tune_model(model_id = 'gpt-3.5-turbo-0301', num_label=label_count, pred_df=pred_df)
    if wait_for_fine_tuning(ft_id) is not None:
        model_ids.append(wait_for_fine_tuning(ft_id))
        accuracy, precision, recall, f1, pred = ft_accuracy(data=df_test,model_id=(wait_for_fine_tuning(ft_id)))
        accs.append(accuracy)
        precisions.append(precision)
        recalls.append(recall)
        f1s.append(f1)
        

None
None
