In [8]:
import pandas as pd
from sklearn.metrics import precision_score, recall_score
import numpy as np

## create predictions file

In [60]:
epochs = [3, 3]
batch_sizes = [8]
lrs = [5e-5, 1e-6, 1e-5]  # [5e-5, 1e-6]
seeds = [42]

results = {}
results[1,2, 3, 5] = 6
results[2, 3, 3, 6] = 8
results

{(1, 2, 3, 5): 6, (2, 3, 3, 6): 8}

In [62]:
results_file_path = 'Data/output/results/{model_name}_on_{dataset}_{date}'.format(
    model_name='t5',
    dataset='igg',
    date='23_08_2023'
)

with open(results_file_path, 'a') as f:
    for k, v in results.items():
        f.write(f'ep: {k[0]}, bs: {k[1]}, lr: {k[2]}, seed: {k[3]}\n')
        f.write(f'accuracy = {v}\n')

# find smallest test size

In [74]:
path = './Data/humor_datasets/{dataset}/with_val_fixed_train/test.csv'
test_size = None
datasets = ['amazon', 'headlines', 'igg', 'twss']
for dataset in datasets:
    df = pd.read_csv(path.format(dataset=dataset))
    print(f'test size of {dataset} is {len(df)}')
test_size

test size of amazon is 8359
test size of headlines is 5150
test size of igg is 519
test size of twss is 788


In [76]:
path = './Data/humor_datasets/{dataset}/with_val_fixed_train/train.csv'
train_size = None
datasets = ['amazon', 'headlines', 'igg', 'twss']
for dataset in datasets:
    df = pd.read_csv(path.format(dataset=dataset))
    print(f'train size of {dataset} is {len(df)}')
    print(f'label 1 count is {len(df[df.label==1])}')
train_size

train size of amazon is 2376
label 1 count is 1188
train size of headlines is 2376
label 1 count is 1188
train size of igg is 2376
label 1 count is 1188
train size of twss is 2376
label 1 count is 1188


In [77]:
path = './Data/humor_datasets/{dataset}/with_val_fixed_train/val.csv'
val_size = None
datasets = ['amazon', 'headlines', 'igg', 'twss']
for dataset in datasets:
    df = pd.read_csv(path.format(dataset=dataset))
    print(f'val size of {dataset} is {len(df)}')
val_size

val size of amazon is 8359
val size of headlines is 5150
val size of igg is 519
val size of twss is 788


In [85]:
path = './Data/humor_datasets/{dataset}/with_val_fixed_train/test.csv'
max_test_size = 3500
datasets = ['amazon', 'headlines', 'igg', 'twss']
for dataset in datasets:
    df = pd.read_csv(path.format(dataset=dataset))
    df = df.iloc[:min(len(df), max_test_size)]
    df_1 = df[df.label == 1]
    df_0 = df[df.label == 0]

    # print(f'test size of {dataset} is {len(df)}')
    print(f'for {dataset}:')
    print(f'%label 1 = {"%.2f" % (100 * len(df_1) / len(df))}, %label 0 = {"%.2f" % (100 * len(df_0) / len(df))}')


for amazon:
%label 1 = 50.77, %label 0 = 49.23
for headlines:
%label 1 = 48.86, %label 0 = 51.14
for igg:
%label 1 = 52.79, %label 0 = 47.21
for twss:
%label 1 = 47.84, %label 0 = 52.16


In [86]:
path = './Data/humor_datasets/{dataset}/with_val_fixed_train/val.csv'
max_val_size = 3500
datasets = ['amazon', 'headlines', 'igg', 'twss']
for dataset in datasets:
    df = pd.read_csv(path.format(dataset=dataset))
    df = df.iloc[:min(len(df), max_val_size)]
    df_1 = df[df.label == 1]
    df_0 = df[df.label == 0]

    print(f'for {dataset}:')
    print(f'%label 1 = {"%.2f" % (100 * len(df_1) / len(df))}, %label 0 = {"%.2f" % (100 * len(df_0) / len(df))}')


for amazon:
%label 1 = 49.46, %label 0 = 50.54
for headlines:
%label 1 = 47.11, %label 0 = 52.89
for igg:
%label 1 = 47.21, %label 0 = 52.79
for twss:
%label 1 = 49.11, %label 0 = 50.89


# compute performance of the model

In [14]:
def get_run_details(run_name):
    run_data = run_name.split('_')
    model = run_data[0]
    dataset_name = run_data[2]
    seed = run_data[3][run_data[3].index('=') + 1:]

    # return model, dataset_name, float(seed)
    return dataset_name, float(seed)

In [16]:
from os.path import exists
from sklearn.metrics import precision_score, recall_score, accuracy_score
import glob
import os

output_path = './Data/output/results/'
# dataset_names = ['amazon', 'headlines', 'igg', 'twss']
dataset_names = ["amazon-headlines", "amazon-igg", "amazon-twss", "headlines-igg", "headlines-twss", "igg-twss"]
data_path = './Data/humor_datasets/'
split_type = 'with_val_fixed_train'
models_path = './Model/SavedModels/Bert-paired'
base_model = 'bert'
models_name = [glob.glob(f'{models_path}/{base_model}_on_{dataset}*')[0] for dataset in dataset_names]

df = pd.read_excel(output_path + 'humor_results_template.xlsx')
df.fillna(method='ffill', axis=0, inplace=True)
df.set_index(['performance', 'model', 'trained on', 'seed'], inplace=True)

for model_name in models_name:
    # base_model, dataset_name, seed = get_run_details(model_name)
    dataset_name, seed = get_run_details(model_name)
    pred_path = model_name + '/predictions/'
    accuracies = {}
    recall = {}
    precision = {}
    predict_dataset_names = ['amazon', 'headlines', 'igg', 'twss']
    for dataset in predict_dataset_names:
        pred_labels_path = pred_path + f'{dataset}_preds.csv'
        test_labels_path = data_path + f'{dataset}/{split_type}/test.csv'
        if not (exists(pred_labels_path) and exists(test_labels_path)):
            print('didnt find preds/test path')
            continue

        _preds = pd.read_csv(pred_labels_path)
        _test = pd.read_csv(test_labels_path)
        _test = _test.iloc[:len(_preds)]
        if (len(_preds[_preds.label == -1]) > 0):
            illegal_indices = _preds[_preds.label == -1].index
            print(f'there are {len(illegal_indices)} illegal indices in {dataset_name} predictions on {dataset}')
            _preds = _preds.drop(labels=illegal_indices, axis=0)
            _test = _test.drop(labels=illegal_indices, axis=0)
        accuracies[dataset] = float("%.4f" % accuracy_score(_test.label, _preds.label))
        recall[dataset] = float("%.4f" % recall_score(_test.label, _preds.label))
        precision[dataset] = float("%.4f" % precision_score(_test.label, _preds.label))

    print(f'performance for {model_name}')
    print(f'accuracies = {accuracies}')
    print(f'recall = {recall}')
    print(f'precision = {precision}')

    df.loc[('accuracy', base_model, dataset_name, seed)] = accuracies
    df.loc[('recall', base_model, dataset_name, seed)] = recall
    df.loc[('precision', base_model, dataset_name, seed)] = precision

# save performance to output file
i = 0
while os.path.exists(output_path + f'humor_results_{i}.xlsx'):
    i += 1

df.to_excel(output_path + f'humor_results_{i}.xlsx')

performance for ./Model/SavedModels/Bert-paired\bert_on_amazon-headlines_seed=27
accuracies = {'amazon': 0.5337, 'headlines': 0.5026, 'igg': 0.9075, 'twss': 0.4886}
recall = {'amazon': 0.9842, 'headlines': 0.9813, 'igg': 0.9416, 'twss': 1.0}
precision = {'amazon': 0.5216, 'headlines': 0.4954, 'igg': 0.8897, 'twss': 0.4833}
performance for ./Model/SavedModels/Bert-paired\bert_on_amazon-igg_seed=42
accuracies = {'amazon': 0.8549, 'headlines': 0.4966, 'igg': 0.8921, 'twss': 0.4277}
recall = {'amazon': 0.8807, 'headlines': 0.9895, 'igg': 0.9124, 'twss': 0.87}
precision = {'amazon': 0.8409, 'headlines': 0.4924, 'igg': 0.8865, 'twss': 0.4493}
performance for ./Model/SavedModels/Bert-paired\bert_on_amazon-twss_seed=27
accuracies = {'amazon': 0.8531, 'headlines': 0.53, 'igg': 0.8439, 'twss': 0.9873}
recall = {'amazon': 0.8531, 'headlines': 0.7795, 'igg': 0.7993, 'twss': 0.9867}
precision = {'amazon': 0.857, 'headlines': 0.5125, 'igg': 0.8939, 'twss': 0.9867}
performance for ./Model/SavedModels

# compute T5 models mean & std accuracy

In [18]:
acc_igg = [0.9347826086956522, 0.9391304347826087, 0.9376811594202898, 0.936231884057971]
acc_amazon = [0.8557142857142858, 0.8542857142857143, 0.8551428571428571, 0.8554285714285714]
acc_headlines = [0.5831428571428572, 0.5805714285714285, 0.5822857142857143, 0.5805714285714285]
acc_twss = [0.45634920634920634, 0.4777636594663278, 0.4885786802030457, 0.799492385786802]
accs = {'amazon': acc_amazon, 'headlines': acc_headlines, 'igg': acc_igg, 'twss': acc_twss}
for k,v in accs.items():
    print(f'{k}: {"%.4f" % np.mean(v)} +- {"%.3f" % np.std(v)}')

amazon: 0.8551 +- 0.001
headlines: 0.5816 +- 0.001
igg: 0.9370 +- 0.002
twss: 0.5555 +- 0.141


# compute Bert models mean & std accuracy


In [7]:
acc_igg = [0.9094412331406551, 0.9113680154142582, 0.9036608863198459, 0.8747591522157996]
acc_amazon = [0.8414285714285714, 0.8368571428571429, 0.8305714285714285, 0.8354285714285714]
acc_headlines = [0.5834285714285714, 0.586, 0.6031428571428571, 0.6091428571428571]
acc_twss = [0.9949238578680203, 0.9898477157360406, 0.9961928934010152, 0.9949238578680203]
accs = {'amazon': acc_amazon, 'headlines': acc_headlines, 'igg': acc_igg, 'twss': acc_twss}
for k,v in accs.items():
    print(f'{k}: {"%.4f" % np.mean(v)} +- {"%.3f" % np.std(v)}')

amazon: 0.8361 +- 0.004
headlines: 0.5954 +- 0.011
igg: 0.8998 +- 0.015
twss: 0.9940 +- 0.002


# compute Bert models on pairs mean & std

In [None]:
amazon_headlines = [0.9094412331406551, 0.9113680154142582, 0.9036608863198459, 0.8747591522157996]
acc_amazon = [0.8414285714285714, 0.8368571428571429, 0.8305714285714285, 0.8354285714285714]
acc_headlines = [0.5834285714285714, 0.586, 0.6031428571428571, 0.6091428571428571]
acc_twss = [0.9949238578680203, 0.9898477157360406, 0.9961928934010152, 0.9949238578680203]

In [None]:
ep: 3, bs: 8, lr: 5e-05, seed: 5
accuracy = 0.9949238578680203, 0.9898477157360406, 0.9961928934010152, 0.9949238578680203
