In [None]:
#import modules
import pandas as pd
import numpy as np
import json
import time
from sklearn.utils import shuffle
import seaborn as sns
import openai
from openai.error import RateLimitError
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import warnings
warnings.filterwarnings('ignore')

In [None]:
#upload the TwIT Dataset
twit=pd.read_csv(r"C:\Users\ridol\OneDrive\Desktop\Emotion Recognition\Dataset\TwIT.csv",
                 sep=';',header=0, encoding='utf8', dtype={'Text':'str','Emozione':'str'})

twit.set_index('Id',drop=True,inplace=True) #I set as row index the id

twit.rename(mapper={'Emozione':'Emotion'},axis='columns',inplace=True) #rename the colums 'Emozione' in 'Emotion'

##Data Exploration
twit.info()
print(twit.isnull().sum()) #there is no null values
freq = twit.groupby(['Emotion']).count()
print(freq)
# Result -->  Happiness(0): 549;  Trust(1):504; Sadness(2):479; Anger(3):513; Fear(4):518; Disgust(5):545;


In [None]:
#Train and Test split
twit_shuffled=shuffle(twit)
Train_set= twit_shuffled.iloc[623:] #80%
Test_set= twit_shuffled.iloc[:623] #20%

#Create jsonl file with Train_set because GPT-3 accept only jsonl format
Train_set.columns= ["prompt", "completion"]
Train_set["prompt"]= Train_set["prompt"] + "\n\nCompletion:\n\n"
Train_set['completion']= " " + Train_set['completion'] + " END"
Train_set.to_json("Train_set.jsonl", orient='records', lines=True)

In [None]:
#GPT-3
API_KEY='sk-ypb3TBOqFXVzQBcqaMHaT4BlbkFJ1c05083kyufG3iyikd0I' #you can take your personal API key when you register in OpenAI site
openai.api_key = API_KEY

#Create File
openai.File.create(
  file=open("Train_set.jsonl"),
  purpose='fine-tune'
)
print(openai.File.list()) #in the list you need to take the file-number

#Create FileTune model
openai.FineTune.create(training_file="file-Qi1LlOnZlfHSrGKSHNpAHVi1", model='davinci') #davinci engine is the best performing engine
print(openai.FineTune.list()) #in the list you need to take the ft-number

#### ALERT ####
openai.FineTune.retrieve(id="ft-hE278KUV4sdLPxKmjPxRvlW1") #this code use your account balance in OpenAI for training the model

#after run the code you wait some minute because the GPT-3 is training the model. 
#when the process ends, you can take the id code of the trained model

# ft-model should have your model id
ft_model = "davinci:ft-lorenzo-ridolfi-2022-10-10-13-18-57"

In [None]:
#Predictions in Test_set

#transform test_set into a dictionary
Test_set.columns= ["prompt", "completion"]
dictionary_test_set = dict([(k,v) for k,v in zip(Test_set['prompt'], Test_set['completion'])])

y_true_n = list(dictionary_test_set.values())
y_true_n=list(y_true_n)

#Create the list with the true value of y
y_true =[]
for i in y_true_n:
    y_true.append(' '+i)

#Create the list with the predict value of y
y_predict= []

for k in dictionary_test_set:
    time.sleep(5)
    ft_model = "davinci:ft-lorenzo-ridolfi-2022-10-10-13-18-57"

    response = openai.Completion.create(
        model=ft_model,
        prompt=k + "\n\nCompletion:\n\n",
        temperature=0,
        stop=[" END"],

    )
    y_predict.append(response["choices"][0]["text"])

    print(k + ':' + response["choices"][0]["text"])

In [None]:
#Create the Classification Report
print('\tClassification Report for GPT-3:\n\n',
      classification_report(y_true, y_predict, target_names=['Happiness', 'Trust', 'Sadness','Anger','Fear','Disgust']))

#Create the Confusion Matrix
labels = ['Happiness','Trust','Sadness','Anger','Fear','Disgust']
def print_cf1(y_test, y_hat):
    cm = confusion_matrix(y_test, y_hat)
    sns.set(font_scale= 1.4, color_codes=True, palette="deep")
    sns.heatmap(pd.DataFrame(cm, index=labels, columns=[0,1,2,3,4,5]),
                annot = True,
                annot_kws = {"size":16},
                fmt="d",
                cmap="YlGnBu")
    plt.title("Confusion Matrix")
    plt.xlabel("Predicted Value")
    plt.xticks([0,1,2,3,4,5], labels, rotation=45)
    plt.ylabel("True Value")
    plt.show()

print_cf1(y_true, y_predict)