Leung Wai Liu <br>
JPMC-SMM4H <br>
July 14, 2022 <br>
Task 5 TOP 5 Majority Ensembling

In [51]:
import pandas as pd
import numpy as np
from collections import Counter
from labels_to_ids import task7_labels_to_ids
from sklearn.metrics import accuracy_score, classification_report, f1_score, precision_score, recall_score, confusion_matrix
import os

In [52]:
# Loading up all the predictions data

n_rnds = 5
original_df = pd.read_csv('../Datasets/dev.tsv', sep='\t')
models = ['dccuchile/bert-base-spanish-wwm-uncased', 'dccuchile/bert-base-spanish-wwm-cased', 'xlm-roberta-base', 'bert-base-multilingual-uncased', 'bert-base-multilingual-cased']
n_models = len(models)

epoch_string = '../15_epochs_small_model/eval_testing/saved_eval_test_result_5'
n_rows = len(original_df)

labels_to_ids = task7_labels_to_ids
ids_to_labels = dict((v,k) for k,v in labels_to_ids.items())

# Loading up all of the results
best_f1 = pd.read_csv('../15_epochs_small_model/training_predictions/validating_statistics/all_best_f1_score.tsv', sep='\t')
best_f1 = best_f1.drop(columns = ['Unnamed: 0'])
best_f1

Unnamed: 0,dccuchile/bert-base-spanish-wwm-uncased,dccuchile/bert-base-spanish-wwm-cased,xlm-roberta-base,bert-base-multilingual-uncased,bert-base-multilingual-cased
0,0.706554,0.722016,0.712117,0.732637,0.71688
1,0.714637,0.698767,0.718595,0.720507,0.718697
2,0.706063,0.69728,0.711941,0.725631,0.717069
3,0.705547,0.696304,0.710646,0.72107,0.71565
4,0.687831,0.710475,0.716102,0.719317,0.712978


In [53]:
# sorting the best f1 scores 
sorted_f1 = [] 

for model in models:
    for rnd in range(n_rnds):
        f1_value = best_f1.at[rnd, model]
        sorted_f1.append((model, rnd, f1_value))

sorted_f1.sort(key = lambda y: y[2], reverse=True)

sorted_f1 = sorted_f1[0:3]
print(sorted_f1)

[('bert-base-multilingual-uncased', 0, 0.7326374302914448), ('bert-base-multilingual-uncased', 2, 0.7256314672463325), ('dccuchile/bert-base-spanish-wwm-cased', 0, 0.7220158039937012)]


In [54]:
# Retrieving all the predictions from the 
list_of_df = []

for model_row in range(len(sorted_f1)):
    to_read_string = epoch_string + '/' + sorted_f1[model_row][0] + '/' + str(sorted_f1[model_row][1]) + '/unformatted_eval_test_result.tsv'
    print(to_read_string)     
    particular_model_df = pd.read_csv(to_read_string, sep='\t')
    
list_of_df.append(particular_model_df)

../15_epochs_small_model/eval_testing/saved_eval_test_result_5/bert-base-multilingual-uncased/0/unformatted_eval_test_result.tsv
../15_epochs_small_model/eval_testing/saved_eval_test_result_5/bert-base-multilingual-uncased/2/unformatted_eval_test_result.tsv
../15_epochs_small_model/eval_testing/saved_eval_test_result_5/dccuchile/bert-base-spanish-wwm-cased/0/unformatted_eval_test_result.tsv


In [55]:
# TAKING THE MAJORITY OF DATA

majority_original_tweet_id_list = []
majority_original_sentence_list = []
majority_original_label_list = []
majority_original_numbered_label_list = []

majority_predicted_number_results = []
majority_predicted_results = []


for index, row in original_df.iterrows(): 
    # getting the original values in the tweet
    original_tweet_id = row['tweet_id']
    original_sentence = row['tweet_text']
    original_label = row['label']
    
    # transferring the labels over to final list
    majority_original_tweet_id_list.append(original_tweet_id)
    majority_original_sentence_list.append(original_sentence)
    majority_original_label_list.append(original_label)
    
    specific_row_results = []
    # go through every models' row of data 
    
    for model_row in range(len(list_of_df)):
            # print(list_of_df[model_num][rnd_num])
        particular_row_df = list_of_df[model_row]
        row = particular_row_df.loc[(particular_row_df['tweet_id'] == original_tweet_id) & (particular_row_df['text'] == original_sentence)]
            
        prediction = labels_to_ids[row['label'].values[0]]
        specific_row_results.append(prediction)

    specific_row_results = Counter(specific_row_results)
    specific_row_results = specific_row_results.most_common(1)[0][0]
    majority_predicted_results.append(ids_to_labels[specific_row_results])
    

In [56]:
# Calculating sklearn metrics

majority_original_numbered_label_list = [labels_to_ids[label] for label in majority_original_label_list]
majority_predicted_number_results = [labels_to_ids[label] for label in majority_predicted_results]

accuracy_result = accuracy_score(majority_original_numbered_label_list, majority_predicted_number_results)
f1_result = f1_score(majority_original_numbered_label_list, majority_predicted_number_results, labels=[0], average=None)[0]
precision_result = precision_score(majority_original_numbered_label_list, majority_predicted_number_results, labels=[0], average=None)[0]
recall_result = recall_score(majority_original_numbered_label_list, majority_predicted_number_results, labels=[0], average=None)[0]

confusion_matrix_result = confusion_matrix(majority_original_numbered_label_list, majority_predicted_number_results)
classification_result = classification_report(majority_original_numbered_label_list, majority_predicted_number_results, output_dict=True)

print("ACCURACY:", accuracy_result)
print("F1:", f1_result)
print("PRECISION:", precision_result)
print("RECALL", recall_result)

# Saving results to file
os.makedirs('../15_epochs_small_model/eval_testing/eval_validation_statistics_top_5/majority_ensemble', exist_ok=True)

with open('../15_epochs_small_model/eval_testing/eval_validation_statistics_top_5/majority_ensemble/majority_ensemble_valid_stats.txt', 'w') as file:
        file.write("Accuracy: " + str(accuracy_result) + "\n")
        file.write("F1 Score (of self_reports): " + str(f1_result) + "\n")
        file.write("Precison Score (of self_reports): " + str(precision_result) + "\n")
        file.write("Recall result (of self_reports): " + str(recall_result) + "\n")


cm_df = pd.DataFrame(confusion_matrix_result)
cr_df = pd.DataFrame(classification_result).transpose()

cm_df.to_csv('../15_epochs_small_model/eval_testing/eval_validation_statistics_top_5/majority_ensemble/majority_confusion_matrix.tsv', sep='\t')
cr_df.to_csv('../15_epochs_small_model/eval_testing/eval_validation_statistics_top_5/majority_ensemble/majority_classification_report.tsv', sep='\t')




ACCURACY: 0.8334328358208956
F1: 0.7358288770053475
PRECISION: 0.655862726406101
RECALL 0.8380024360535931


In [57]:
# Saving it as a dataframe
unformatted_majority_prediction_data = pd.DataFrame(zip(majority_original_tweet_id_list, majority_original_sentence_list, majority_original_label_list, majority_predicted_results), columns=['tweet_id', 'text', 'Orig', 'label'])
formatted_majority_prediction_data = unformatted_majority_prediction_data.drop(columns = ['Orig'])

# Saving it as a tsv file
os.makedirs('../15_epochs_small_model/eval_testing/eval_validation_statistics_top_5/', exist_ok=True)
unformatted_majority_prediction_data.to_csv('../15_epochs_small_model/eval_testing/eval_validation_statistics_top_5/majority_ensemble/unformatted_majority_data.tsv', sep='\t', index=False)
formatted_majority_prediction_data.to_csv('../15_epochs_small_model/eval_testing/eval_validation_statistics_top_5/majority_ensemble/formatted_majority_data.tsv', sep='\t', index=False)