In [1]:
#import libraries
import nltk
from nltk.corpus import stopwords
from sklearn.metrics import precision_score, recall_score, accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix
import seaborn as sns
sns.set_theme(style="whitegrid")
import pandas as pd
import seaborn as sns
%matplotlib inline
import numpy as np
from scipy import stats
from scipy.stats import norm
import matplotlib.pyplot as plt
import math

#100 bootstrap resampling parameter
n_samples = 100
    
#process classifications to convert to binary labels
def map_to_number(x):
    if x=='a' or x=='use' or x=='used':
        return 0
    elif x=='b' or x=='mention' or x=='mentioned' or x=='m':
        return 1
    else:
        return np.nan
    
#helpers for classifying metrics 
def false_positive(true, predicted):
    CM = confusion_matrix(true, predicted)

    TN = CM[0][0]
    FN = CM[1][0]
    TP = CM[1][1]
    FP = CM[0][1]

    return(100*FP/(FP+TN))
    
def false_negative(true, predicted):
    CM = confusion_matrix(true, predicted)

    TN = CM[0][0]
    FN = CM[1][0]
    TP = CM[1][1]
    FP = CM[0][1]

    return(100*FN/(FN + TP))
    

def get_metrics(true_o, predicted_o):
    rs = []
    ps = []
    acs = []
    for i in range(n_samples):
        t = df.sample(len(df), replace = True)
        true = t.dropna(subset = [model])['type'].apply(lambda x: 1 if x =='disapproving' else 0)
        predicted = t.dropna(subset = [model])[model]
        rs.append(false_positive(true, predicted))
        ps.append(false_negative(true, predicted))
        acs.append((1 - accuracy_score(true, predicted))*100)
        
        
    return false_negative(true_o, predicted_o), false_positive(true_o, predicted_o), 100*(1 - accuracy_score(true_o, predicted_o)),\
    np.nanpercentile(ps,97.5), np.nanpercentile(rs,97.5), np.nanpercentile(acs,97.5),\
    np.nanpercentile(ps,2.5), np.nanpercentile(rs,2.5), np.nanpercentile(acs,2.5),\
    np.nanpercentile(ps,50), np.nanpercentile(rs,50), np.nanpercentile(acs,50)

#tested models list helpers
models = ['label_gpt3.5',
          'label_gpt4',
          'label_gpt3.5instruct']

#load the dataset with classifications
table = pd.read_csv('data/task1.csv')

### Calculate metrics for hate subtask

In [2]:
list_entries = []

#select hate task samples
df = table.loc[table.subtask=='hate'].reset_index()

#clean and process classifications (removing trailing characters and mapping to numbers)
df['label_gpt4'] = df['label_gpt4'].apply(lambda x: str(x).lower().strip('"').strip('.').strip(' '))
df['label_gpt3.5'] = df['label_gpt3.5'].apply(lambda x: str(x).lower().strip('"').strip('.').strip(' '))
df['label_gpt3.5instruct'] = df['label_gpt3.5instruct'].apply(lambda x: str(x).lower().strip('"').strip('.').strip(' '))

df['label_gpt4'] = df['label_gpt4'].apply(map_to_number)
df['label_gpt3.5'] = df['label_gpt3.5'].apply(map_to_number)
df['label_gpt3.5instruct'] = df['label_gpt3.5instruct'].apply(map_to_number)

#calculate the perfomance for each model
for cnt,model in enumerate(models):
    entry = {}
    entry['task'] = 'hate speech'
    entry['model'] = model
    
    true = df.dropna(subset = [model])['type'].apply(lambda x: 1 if x =='disapproving' else 0)
    predicted = df.dropna(subset = [model])[model]
    
    metrics = get_metrics(true, predicted)
    entry['False positive rate'] = metrics[0]
    entry['False negative rate'] = metrics[1]
    entry['Average error rate'] = metrics[2]
    
    entry['False positive rate upper'] = metrics[3]
    entry['False negative rate upper'] = metrics[4]
    entry['Average error rate upper'] = metrics[5]
    
    entry['False positive rate lower'] = metrics[6]
    entry['False negative rate lower'] = metrics[7]
    entry['Average error rate lower'] = metrics[8]
    
    entry['False positive rate median'] = metrics[9]
    entry['False negative rate median'] = metrics[10]
    entry['Average error rate median'] = metrics[11]
    
    list_entries.append(entry)

### Calculate metrics for misinformation subtask

In [3]:
#select misinformation task samples
df = table.loc[table.subtask=='misinfo'].reset_index()

#clean and process classifications (removing trailing characters and mapping to numbers)
df['label_gpt4'] = df['label_gpt4'].apply(lambda x: str(x).lower().strip('"').strip('.').strip(' '))
df['label_gpt3.5'] = df['label_gpt3.5'].apply(lambda x: str(x).lower().strip('"').strip('.').strip(' '))
df['label_gpt3.5instruct'] = df['label_gpt3.5instruct'].apply(lambda x: str(x).lower().strip('"').strip('.').strip(' '))

df['label_gpt4'] = df['label_gpt4'].apply(map_to_number)
df['label_gpt3.5'] = df['label_gpt3.5'].apply(map_to_number)
df['label_gpt3.5instruct'] = df['label_gpt3.5instruct'].apply(map_to_number)

#calculate the perfomance for each model
for cnt,model in enumerate(models):
    entry = {}
    entry['task'] = 'misinformation'
    entry['model'] = model
    
    true = df.dropna(subset = [model])['type'].apply(lambda x: 1 if x =='disapproving' else 0)
    predicted = df.dropna(subset = [model])[model]
    
    metrics = get_metrics(true, predicted)
    entry['False positive rate'] = metrics[0]
    entry['False negative rate'] = metrics[1]
    entry['Average error rate'] = metrics[2]
    
    entry['False positive rate upper'] = metrics[3]
    entry['False negative rate upper'] = metrics[4]
    entry['Average error rate upper'] = metrics[5]
    
    entry['False positive rate lower'] = metrics[6]
    entry['False negative rate lower'] = metrics[7]
    entry['Average error rate lower'] = metrics[8]
    
    entry['False positive rate median'] = metrics[9]
    entry['False negative rate median'] = metrics[10]
    entry['Average error rate median'] = metrics[11]
     
    list_entries.append(entry)

In [4]:
res=pd.DataFrame(list_entries)

In [5]:
#calculate the confidence interval (upper error - lower error)
res['Average error rate error'] = (res['Average error rate upper'] - res['Average error rate lower'])/2
res['False positive rate error'] = (res['False positive rate upper'] - res['False positive rate lower'])/2
res['False negative rate error'] = (res['False negative rate upper'] - res['False negative rate lower'])/2

#print the table
res[['task','model','False positive rate','False positive rate error',
     'False negative rate','False negative rate error',
     'Average error rate','Average error rate error']].\
    sort_values(by = 'Average error rate',ascending = False).sort_values(by = 'task',ascending = True).round(2)

Unnamed: 0,task,model,False positive rate,False positive rate error,False negative rate,False negative rate error,Average error rate,Average error rate error
2,hate speech,label_gpt3.5instruct,17.98,8.31,14.77,6.83,16.38,5.3
0,hate speech,label_gpt3.5,6.82,5.59,20.0,7.93,13.48,4.54
1,hate speech,label_gpt4,20.0,7.85,4.44,3.83,12.22,4.72
5,misinformation,label_gpt3.5instruct,34.38,2.77,40.08,3.02,37.22,2.27
3,misinformation,label_gpt3.5,8.05,1.79,49.76,4.01,28.93,2.28
4,misinformation,label_gpt4,23.44,2.76,3.89,1.08,13.64,1.64
