Leung Wai Liu <br>
JPMC-SMM4H <br>
July 14, 2022 <br>
Task 2b SEPARATE Majority Ensembling

In [1]:
import pandas as pd
import numpy as np
from collections import Counter
from labels_to_ids import task7_labels_to_ids
from sklearn.metrics import accuracy_score, classification_report, f1_score, precision_score, recall_score, confusion_matrix
from training_code import calculate_overall_performance_metrics
import os

In [2]:
# Loading up all the predictions data

n_rnds = 5
original_df = pd.read_csv('../Datasets/dev.tsv', sep='\t')
models = ['bert-large-uncased', 'roberta-large']
n_models = len(models)

epoch_string = '../15_epochs_large_model/eval_testing/saved_eval_test_result_2b'
n_rows = len(original_df)

labels_to_ids = task7_labels_to_ids
ids_to_labels = dict((v,k) for k,v in labels_to_ids.items())



In [3]:
# Retrieving all the predictions from the 
list_of_df = pd.DataFrame(index=range(n_rnds), columns=models)

for model in models: 
    specific_model_row = []
    for rnd in range(n_rnds):
        to_read_string = epoch_string + '/' + model + '/' + str(rnd) + '/unformatted_eval_test_result.tsv'
            
        particular_model_df = pd.read_csv(to_read_string, sep='\t')
        list_of_df.at[rnd, model] = particular_model_df

list_of_df

Unnamed: 0,bert-large-uncased,roberta-large
0,id ...,id ...
1,id ...,id ...
2,id ...,id ...
3,id ...,id ...
4,id ...,id ...


In [4]:
# TAKING THE MAJORITY OF DATA

majority_original_tweet_id_list = []
majority_original_sentence_list = []
majority_original_claim_list = []
majority_original_label_list = []

majority_predicted_number_results = pd.DataFrame(index=range(n_rows), columns=models)
majority_predicted_results = pd.DataFrame(index=range(n_rows), columns=models)


for index, row in original_df.iterrows(): 
    # getting the original values in the tweet
    original_tweet_id = row['id']
    original_sentence = row['Tweet']
    original_claim = row['Claim']
    original_label = row['Premise']
    
    # transferring the labels over to final list
    majority_original_tweet_id_list.append(original_tweet_id)
    majority_original_sentence_list.append(original_sentence)
    majority_original_claim_list.append(original_claim)
    majority_original_label_list.append(original_label)
    
    # go through every models' row of data 
    
    for model in models:
        specific_row_results = []
        for rnd_num in range(n_rnds):
            # print(particular_df)
            particular_df = list_of_df.at[rnd_num, model]
            row = particular_df.loc[(particular_df['id'] == original_tweet_id) & (particular_df['text'] == original_sentence)]
            
            prediction = row['Premise'].values[0] 
            specific_row_results.append(prediction)
        
        specific_row_results = Counter(specific_row_results)
        specific_row_results = specific_row_results.most_common(1)[0][0] 
        specific_row_label = specific_row_results
        
        majority_predicted_results.at[index, model] = specific_row_label
        majority_predicted_number_results.at[index, model] = specific_row_results
    

In [5]:
majority_predicted_number_results

Unnamed: 0,bert-large-uncased,roberta-large
0,0,0
1,0,0
2,1,1
3,0,0
4,0,0
...,...,...
595,0,0
596,1,1
597,1,1
598,0,0


In [6]:
# Calculating sklearn metrics

sep_majority_metrics = pd.DataFrame(index=['f1', 'accuracy', 'precision', 'recall'], columns=models)

for model in models: 
        sep_maj_predicted_number_results = majority_predicted_results[model].tolist() 
        num_overall_prediction_data = pd.DataFrame(zip(majority_original_tweet_id_list, majority_original_sentence_list, majority_original_claim_list, majority_original_label_list, sep_maj_predicted_number_results), columns=['tweet_id', 'text', 'Claim', 'Orig', 'Premise'])

        fm_f1_score, fm_precision, fm_recall, saho_f1_score, saho_precision, saho_recall, sc_f1_score, sc_precision, sc_recall = calculate_overall_performance_metrics(num_overall_prediction_data)

        accuracy_result = accuracy_score(majority_original_label_list, sep_maj_predicted_number_results)

        net_f1 = (1.0/3.0) * (fm_f1_score + saho_f1_score + sc_f1_score)

        sep_majority_metrics.at['f1', model] = net_f1
        sep_majority_metrics.at['accuracy', model] = accuracy_result
        sep_majority_metrics.at['precision', model] = [fm_precision, saho_precision, sc_precision]
        sep_majority_metrics.at['recall', model] = [fm_recall, saho_recall, sc_recall]

print(sep_majority_metrics)

# Saving results to file
os.makedirs('../15_epochs_small_model/eval_testing/eval_validation_statistics/majority_ensemble', exist_ok=True)

sep_majority_metrics.to_csv('../15_epochs_small_model/eval_testing/eval_validation_statistics/majority_ensemble/sep_majority_metrics.tsv', sep='\t')





Running performance metrics
Finished running performance metrics
Running performance metrics
Finished running performance metrics
                                          bert-large-uncased  \
f1                                                  0.815692   
accuracy                                            0.831667   
precision  [0.8083333333333333, 0.7929704476314646, 0.842...   
recall     [0.8151374370886566, 0.7961234350977378, 0.845...   

                                               roberta-large  
f1                                                  0.805488  
accuracy                                            0.823333  
precision  [0.808234845596645, 0.8058516544525871, 0.8038...  
recall     [0.8130081300813008, 0.8024928618493301, 0.806...  


In [7]:
# # Saving it as a dataframe

# for model in models:
#     majority_predicted_results = [ids_to_labels[id] for id in majority_predicted_number_results[model].tolist()]
#     unformatted_majority_prediction_data = pd.DataFrame(zip(majority_original_tweet_id_list, majority_original_sentence_list, majority_original_label_list, majority_predicted_results), columns=['tweet_id', 'text', 'Orig', 'label'])
#     formatted_majority_prediction_data = unformatted_majority_prediction_data.drop(columns = ['text', 'Orig'])

#     to_save_location = '../15_epochs_small_model/eval_testing/sep_majority_results/' + model + '/'
#     os.makedirs(to_save_location, exist_ok=True)
#     unformatted_to_save_location = to_save_location + 'unformatted_sep_majority.tsv'
#     formatted_to_save_location = to_save_location + 'formatted_sep_majority.tsv'
    
#     unformatted_majority_prediction_data.to_csv(unformatted_to_save_location, sep='\t', index=False)
#     formatted_majority_prediction_data.to_csv(formatted_to_save_location, sep='\t', index=False)
