### Import Library

In [1]:
import pandas as pd
import numpy as np
import os

# string processing
import re
from keras.utils.np_utils import to_categorical
from sklearn import feature_extraction, model_selection, naive_bayes, pipeline, manifold, preprocessing, feature_selection, metrics


### Import true label

In [2]:
# import the true label

df_merge_quality = pd.read_csv('data/US_patent_abstract_5000_2015_with_title_1_5y.csv')

In [3]:
y_test_true = df_merge_quality['quality_rank'][4000:].values
print(y_test_true.shape)
y_test_true[:10]

(1000,)


array([0, 1, 0, 0, 0, 1, 1, 1, 1, 1])

In [4]:
# double check the label distribution
# Validation set

count = len(y_test_true)
positive = sum(y_test_true)
negative = sum(x == 0 for x in y_test_true)
    
print(f'Total Sample Count = {count} \nPositive Label Count = {positive} \nNegative Label Count = {negative} \nPositive Class Ratio = {positive/count}  \nNegative Class Ratio = {negative/count}')


Total Sample Count = 1000 
Positive Label Count = 405 
Negative Label Count = 595 
Positive Class Ratio = 0.405  
Negative Class Ratio = 0.595


### Create Majority Class Prediction file as one baseline to add into the pool

In [5]:
mc_class = np.zeros(1000)
mc_class
mc_class.shape

(1000,)

In [9]:
np.savetxt("Prediction_Output/Majority_Class_prediction.csv", mc_class)

### Import the prediction output files
#### Let's first look at couple example of the prediction output file

In [10]:
# Option 1: Use .read.csv
test_read1 = pd.read_csv("Prediction_Output/BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv", 
                        names = ['Class 0', 'Class 1'], 
                        sep=" ")
test_read1

Unnamed: 0,Class 0,Class 1
0,0.847997,0.130727
1,0.332160,0.667886
2,0.236171,0.788574
3,0.887799,0.081256
4,0.828767,0.121260
...,...,...
995,0.540695,0.398545
996,0.932154,0.059405
997,0.195182,0.780047
998,0.534664,0.487115


In [11]:
# Option 2: Use .readlines
temp_pred = []
with open("Prediction_Output/BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv") as f1:
    for line in f1.readlines():
        print([float(x) for x in line.split()])

[0.8479969501495361, 0.13072672486305237]
[0.3321598172187805, 0.6678864359855652]
[0.2361709177494049, 0.7885740995407104]
[0.8877992033958435, 0.08125635981559753]
[0.8287674188613892, 0.12126028537750244]
[0.6365858316421509, 0.30468297004699707]
[0.7011922597885132, 0.24463146924972534]
[0.8038750290870667, 0.13103100657463074]
[0.8003423810005188, 0.19075867533683777]
[0.9636895656585693, 0.02832663059234619]
[0.8826712965965271, 0.09366616606712341]
[0.738400399684906, 0.2574821710586548]
[0.7997338175773621, 0.19859188795089722]
[0.9297205209732056, 0.05143982172012329]
[0.788860023021698, 0.21694540977478027]
[0.8776333332061768, 0.13374078273773193]
[0.8845688104629517, 0.0818154513835907]
[0.8796502351760864, 0.11024996638298035]
[0.9105886220932007, 0.10028195381164551]
[0.7579448223114014, 0.25525808334350586]
[0.35311049222946167, 0.6041271090507507]
[0.8072425723075867, 0.19061025977134705]
[0.5108556747436523, 0.4793073534965515]
[0.41507914662361145, 0.6663478016853333]

In [12]:
# We can get the predicted class this way
temp_pred2 = []
with open("Prediction_Output/BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv") as f1:
    for line in f1.readlines():
        temp_pred2.append(np.argmax([float(x) for x in line.split()]))
temp_pred2

[0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,


In [13]:
# We can get the predicted class this way
temp_pred2 = []
with open('Prediction_Output/BERT_CNN_5yr_claims_last_510_words_dev_prob.csv') as f1:
    for line in f1.readlines():
        temp_pred2.append(np.argmax([float(x) for x in line.split()]))
sum(temp_pred2)


33

### Create a list holding all filenames to generate combinations pair later

In [15]:
# https://stackoverflow.com/questions/10377998/how-can-i-iterate-over-files-in-a-given-directory

import os

directory = "Prediction_Output"

file_name = []

for filename in os.listdir(directory):
    if filename.endswith(".csv") and not('50k' in filename): # not include 50k version for now
        file_name.append(os.path.join(directory, filename))

file_name

['Prediction_Output/TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-Copy1.csv',
 'Prediction_Output/BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv',
 'Prediction_Output/HATT.csv',
 'Prediction_Output/Distil-BERT_5yr_claims_dev_prob-Copy1.csv',
 'Prediction_Output/BERT_fine_tune_5yr_claims_dev_prob-Copy1.csv',
 'Prediction_Output/Majority_Class_prediction.csv',
 'Prediction_Output/CNN_5yr_claims_dev_prob-Copy1.csv',
 'Prediction_Output/BERT_CNN_5yr_claims_last_510_words_dev_prob.csv',
 'Prediction_Output/BERT_fine_tune_5yr_abstract_title_dev_prob-Copy1.csv',
 'Prediction_Output/BERT_CNN_5yr_claims_dev_prob-Copy1.csv',
 'Prediction_Output/HATT_abst.csv',
 'Prediction_Output/BERT_fine_tune_5yr_claims_dev_prob_last_n_words.csv',
 'Prediction_Output/LSTM.csv',
 'Prediction_Output/TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv',
 'Prediction_Output/CNN_5yr_abstract_title_dev_prob-Copy1.csv',
 'Prediction_Output/LSTM_abst.csv']

### Side-Task: For summary purpose, show the metrics for all files

In [16]:
# define a helper function for below to clean up filename
def clean_filename(filepath):
    '''
    filepah: to pass in string
    '''
    
    if '/' in filepath: # to show only the filename not the path
        file_name = filepath.split('/')[-1]
    else:
        file_name = filepath
    
    return file_name

In [20]:
summary_df = pd.DataFrame(columns = ['Model', 'Accuracy', 'Precision', 'Recall','F1','AUC'])

for file in file_name:
    temp_pred_result = []
    with open(file) as f1:
        for line in f1.readlines():
            temp_pred_result.append(np.argmax([float(x) for x in line.split()]))
    #print(file)
    
    accuracy = metrics.accuracy_score(y_test_true, temp_pred_result)
    precision = metrics.precision_score(y_test_true, temp_pred_result)
    recall = metrics.recall_score(y_test_true, temp_pred_result)
    f1 = metrics.f1_score(y_test_true, temp_pred_result)
    auc = metrics.roc_auc_score(y_test_true, temp_pred_result)

    row_to_add = {'Model': clean_filename(file), 'Accuracy': round(accuracy,3), 'Precision':round(precision,3), 'Recall': round(recall,3) ,'F1': round(f1,3) ,'AUC': round(auc, 3) }
    
    summary_df = summary_df.append(row_to_add, ignore_index = True)
    

  _warn_prf(average, modifier, msg_start, len(result))


In [21]:
summary_df

Unnamed: 0,Model,Accuracy,Precision,Recall,F1,AUC
0,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,0.6,0.532,0.104,0.174,0.521
1,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,0.603,0.531,0.17,0.258,0.534
2,HATT.csv,0.699,0.693,0.462,0.554,0.661
3,Distil-BERT_5yr_claims_dev_prob-Copy1.csv,0.627,0.603,0.232,0.335,0.564
4,BERT_fine_tune_5yr_claims_dev_prob-Copy1.csv,0.628,0.577,0.306,0.4,0.577
5,Majority_Class_prediction.csv,0.595,0.0,0.0,0.0,0.5
6,CNN_5yr_claims_dev_prob-Copy1.csv,0.625,0.549,0.417,0.474,0.592
7,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,0.606,0.667,0.054,0.1,0.518
8,BERT_fine_tune_5yr_abstract_title_dev_prob-Cop...,0.616,0.538,0.363,0.434,0.576
9,BERT_CNN_5yr_claims_dev_prob-Copy1.csv,0.604,0.514,0.412,0.458,0.573


In [22]:
print("Rank by Accuracy")
summary_df.sort_values(by='Accuracy', ascending = False)


Rank by Accuracy


Unnamed: 0,Model,Accuracy,Precision,Recall,F1,AUC
2,HATT.csv,0.699,0.693,0.462,0.554,0.661
11,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,0.689,0.636,0.543,0.586,0.666
12,LSTM.csv,0.681,0.623,0.538,0.577,0.658
4,BERT_fine_tune_5yr_claims_dev_prob-Copy1.csv,0.628,0.577,0.306,0.4,0.577
3,Distil-BERT_5yr_claims_dev_prob-Copy1.csv,0.627,0.603,0.232,0.335,0.564
6,CNN_5yr_claims_dev_prob-Copy1.csv,0.625,0.549,0.417,0.474,0.592
13,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,0.618,0.632,0.136,0.224,0.541
8,BERT_fine_tune_5yr_abstract_title_dev_prob-Cop...,0.616,0.538,0.363,0.434,0.576
7,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,0.606,0.667,0.054,0.1,0.518
9,BERT_CNN_5yr_claims_dev_prob-Copy1.csv,0.604,0.514,0.412,0.458,0.573


In [23]:
print("Rank by Precision")
summary_df.sort_values(by='Precision', ascending = False)


Rank by Precision


Unnamed: 0,Model,Accuracy,Precision,Recall,F1,AUC
2,HATT.csv,0.699,0.693,0.462,0.554,0.661
7,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,0.606,0.667,0.054,0.1,0.518
11,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,0.689,0.636,0.543,0.586,0.666
13,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,0.618,0.632,0.136,0.224,0.541
12,LSTM.csv,0.681,0.623,0.538,0.577,0.658
3,Distil-BERT_5yr_claims_dev_prob-Copy1.csv,0.627,0.603,0.232,0.335,0.564
4,BERT_fine_tune_5yr_claims_dev_prob-Copy1.csv,0.628,0.577,0.306,0.4,0.577
6,CNN_5yr_claims_dev_prob-Copy1.csv,0.625,0.549,0.417,0.474,0.592
8,BERT_fine_tune_5yr_abstract_title_dev_prob-Cop...,0.616,0.538,0.363,0.434,0.576
0,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,0.6,0.532,0.104,0.174,0.521


### Ensemble Strategy: 
The strategy of ensemble is to average the predicted probability for both Class 0 and Class 1 from  two different models, then take argmax to determine the class by the larger average probability.

### Start with 2 models combination:

In [27]:
test_222 = ['Accuracy', 'Precision', 'Recall','F1','AUC']

In [28]:
model_n + test_222

['Model 0',
 'Model 1',
 'Model 2',
 'Accuracy',
 'Precision',
 'Recall',
 'F1',
 'AUC']

In [29]:
model_n = []
for i in range(3):
    model_n.append('Model ' + str(i + 1))
model_n

['Model 1', 'Model 2', 'Model 3']

In [None]:
[model_n.append(i+1) for i in range(n)]

In [33]:
for i, x in enumerate(pair_iterator_2):
    print(i)
    print(x)

0
('Prediction_Output/TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-Copy1.csv', 'Prediction_Output/BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv')
1
('Prediction_Output/TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-Copy1.csv', 'Prediction_Output/HATT.csv')
2
('Prediction_Output/TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-Copy1.csv', 'Prediction_Output/Distil-BERT_5yr_claims_dev_prob-Copy1.csv')
3
('Prediction_Output/TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-Copy1.csv', 'Prediction_Output/BERT_fine_tune_5yr_claims_dev_prob-Copy1.csv')
4
('Prediction_Output/TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-Copy1.csv', 'Prediction_Output/Majority_Class_prediction.csv')
5
('Prediction_Output/TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-Copy1.csv', 'Prediction_Output/CNN_5yr_claims_dev_prob-Copy1.csv')
6
('Prediction_Output/TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-Copy1.csv', 'Prediction_Output/BERT_CNN_5yr_claims_last_510_words_dev_prob.csv')
7
('Prediction_Output/TFIDF_NaiveBayes_5yr_ab

In [43]:
from itertools import combinations


def ensemble_result(file_name, n = 2):
    '''
    file_name is a list of filepath+filename which created in above cell
    n is the number of models we want to combine at one time to assess the model performance.
    '''
    
    pair_iterator_2 = list(combinations(file_name, n))

    # for this purpose, since I need to average two files together, I feel it's easier to use pandas dataframe and use vectorized operation

    df_ensemble_compare_2 = pd.DataFrame(columns = [model_n.append(i+1) for i in range(n)] + ['Accuracy', 'Precision', 'Recall','F1','AUC'])
    
    
    
    for i, files in enumerate(pair_iterator_2):
        temp_pred_result = []
        dict_dataframe = dict()
        
        for file in files:
        # temporary hold of data from two files
            dict_dataframe[i] = pd.read_csv(file, 
                                names = ['Class 0', 'Class 1'], 
                                sep=" ")

        # average two files
        counter = 0
        for i, k in enumerate(dict_dataframe):
            if counter == 0:
                df_combine = k
                counter += 1
            else:
                df_combine = df_combine + k
        
        df_combine = df_combine / n
        
        # compute the new predicted output
        for i in df_combine.itertuples():
            temp_pred_result.append(np.argmax([i._1, i._2]))
        temp_pred_result

        accuracy = metrics.accuracy_score(y_test_true, temp_pred_result)
        precision = metrics.precision_score(y_test_true, temp_pred_result)
        recall = metrics.recall_score(y_test_true, temp_pred_result)
        f1 = metrics.f1_score(y_test_true, temp_pred_result)
        auc = metrics.roc_auc_score(y_test_true, temp_pred_result)


        row_to_add = {'Model 1': clean_filename(files[0]), 'Model 2': clean_filename(files[1]), 'Accuracy': accuracy, 'Precision':precision, 'Recall': recall,'F1': f1,'AUC': auc}

        df_ensemble_compare_2 = df_ensemble_compare_2.append(row_to_add, ignore_index = True)
        
    return df_combine

In [44]:
temp_test = ensemble_result(file_name, 2)
temp_test

AttributeError: 'float' object has no attribute 'itertuples'

In [42]:
temp_test = ensemble_result(file_name, 2)
temp_test

{119:       Class 0   Class 1
 0    0.701989  0.298636
 1    0.513898  0.487066
 2    0.238923  0.757815
 3    0.699248  0.300325
 4    0.896259  0.101306
 ..        ...       ...
 995  0.261843  0.734841
 996  0.589194  0.413243
 997  0.284094  0.713892
 998  0.203889  0.792860
 999  0.458762  0.540405
 
 [1000 rows x 2 columns]}

In [31]:

# To generate all combinatition of pairs of models
# https://www.geeksforgeeks.org/itertools-combinations-module-python-print-possible-combinations/
from itertools import combinations

pair_iterator_2 = list(combinations(file_name, 2))

In [16]:
len(pair_iterator_2)

105

In [17]:
pair_iterator_2

[('Prediction_Output/TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-Copy1.csv',
  'Prediction_Output/BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv'),
 ('Prediction_Output/TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-Copy1.csv',
  'Prediction_Output/HATT.csv'),
 ('Prediction_Output/TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-Copy1.csv',
  'Prediction_Output/Distil-BERT_5yr_claims_dev_prob-Copy1.csv'),
 ('Prediction_Output/TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-Copy1.csv',
  'Prediction_Output/BERT_fine_tune_5yr_claims_dev_prob-Copy1.csv'),
 ('Prediction_Output/TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-Copy1.csv',
  'Prediction_Output/CNN_5yr_claims_dev_prob-Copy1.csv'),
 ('Prediction_Output/TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-Copy1.csv',
  'Prediction_Output/BERT_CNN_5yr_claims_last_510_words_dev_prob.csv'),
 ('Prediction_Output/TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-Copy1.csv',
  'Prediction_Output/BERT_fine_tune_5yr_abstract_title_dev_prob-Copy1.csv'),
 ('Predi

In [18]:
# To show all rows (optional)
pd.set_option('display.max_rows', 100)
#pd.reset_option('display.max_rows') # back to default

In [19]:
# for this purpose, since I need to average two files together, I feel it's easier to use pandas dataframe and use vectorized operation

df_ensemble_compare_2 = pd.DataFrame(columns = ['Model 1', 'Model 2', 'Accuracy', 'Precision', 'Recall','F1','AUC'])

for files in pair_iterator_2:
    temp_pred_result = []
    
    
    # temporary hold of data from two files
    df_file_1 = pd.read_csv(files[0], 
                            names = ['Class 0', 'Class 1'], 
                            sep=" ")

    df_file_2 = pd.read_csv(files[1], 
                            names = ['Class 0', 'Class 1'], 
                            sep=" ")
    
    # average two files
    #df_combine = df_file_1 * 0.5 + df_file_2 * 0.5
    df_combine = (df_file_1 + df_file_2) / 2

    # compute the new predicted output
    for i in df_combine.itertuples():
        temp_pred_result.append(np.argmax([i._1, i._2]))
    temp_pred_result
    
    accuracy = metrics.accuracy_score(y_test_true, temp_pred_result)
    precision = metrics.precision_score(y_test_true, temp_pred_result)
    recall = metrics.recall_score(y_test_true, temp_pred_result)
    f1 = metrics.f1_score(y_test_true, temp_pred_result)
    auc = metrics.roc_auc_score(y_test_true, temp_pred_result)
    
    
    row_to_add = {'Model 1': clean_filename(files[0]), 'Model 2': clean_filename(files[1]), 'Accuracy': accuracy, 'Precision':precision, 'Recall': recall,'F1': f1,'AUC': auc}
    
    df_ensemble_compare_2 = df_ensemble_compare_2.append(row_to_add, ignore_index = True)
    
df_ensemble_compare_2

Unnamed: 0,Model 1,Model 2,Accuracy,Precision,Recall,F1,AUC
0,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,0.608,0.571429,0.128395,0.209677,0.531424
1,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,HATT.csv,0.698,0.784530,0.350617,0.484642,0.642536
2,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,Distil-BERT_5yr_claims_dev_prob-Copy1.csv,0.618,0.666667,0.113580,0.194093,0.537462
3,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,BERT_fine_tune_5yr_claims_dev_prob-Copy1.csv,0.628,0.627907,0.200000,0.303371,0.559664
4,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,CNN_5yr_claims_dev_prob-Copy1.csv,0.639,0.582707,0.382716,0.461997,0.598081
...,...,...,...,...,...,...,...
100,LSTM.csv,CNN_5yr_abstract_title_dev_prob-Copy1.csv,0.646,0.596226,0.390123,0.471642,0.605146
101,LSTM.csv,LSTM_abst.csv,0.670,0.633452,0.439506,0.518950,0.633198
102,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,CNN_5yr_abstract_title_dev_prob-Copy1.csv,0.614,0.546341,0.276543,0.367213,0.560120
103,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,LSTM_abst.csv,0.616,0.551724,0.276543,0.368421,0.561801


In [20]:
print("Rank by Accuracy")
df_ensemble_compare_2.sort_values(by = "Accuracy", ascending = False)

Rank by Accuracy


Unnamed: 0,Model 1,Model 2,Accuracy,Precision,Recall,F1,AUC
1,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,HATT.csv,0.698,0.784530,0.350617,0.484642,0.642536
34,HATT.csv,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,0.697,0.675862,0.483951,0.564029,0.662984
36,HATT.csv,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,0.691,0.758065,0.348148,0.477157,0.636259
28,HATT.csv,BERT_fine_tune_5yr_claims_dev_prob-Copy1.csv,0.691,0.695122,0.422222,0.525346,0.648086
35,HATT.csv,LSTM.csv,0.690,0.644377,0.523457,0.577657,0.663409
...,...,...,...,...,...,...,...
8,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,HATT_abst.csv,0.600,0.512438,0.254321,0.339934,0.544808
74,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,0.599,0.611111,0.027160,0.052009,0.507698
5,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,0.597,0.571429,0.019753,0.038186,0.504835
94,HATT_abst.csv,LSTM_abst.csv,0.596,0.501650,0.375309,0.429379,0.560764


In [21]:
print("Rank by Precision")
df_ensemble_compare_2.sort_values(by = "Precision", ascending = False)

Rank by Precision


Unnamed: 0,Model 1,Model 2,Accuracy,Precision,Recall,F1,AUC
30,HATT.csv,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,0.669,0.798387,0.244444,0.374291,0.601214
1,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,HATT.csv,0.698,0.784530,0.350617,0.484642,0.642536
9,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,0.668,0.774436,0.254321,0.382900,0.601950
72,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,0.625,0.758621,0.108642,0.190065,0.542556
36,HATT.csv,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,0.691,0.758065,0.348148,0.477157,0.636259
...,...,...,...,...,...,...,...
12,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,CNN_5yr_abstract_title_dev_prob-Copy1.csv,0.603,0.519048,0.269136,0.354472,0.549694
89,BERT_CNN_5yr_claims_dev_prob-Copy1.csv,LSTM_abst.csv,0.605,0.517857,0.358025,0.423358,0.565567
8,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,HATT_abst.csv,0.600,0.512438,0.254321,0.339934,0.544808
94,HATT_abst.csv,LSTM_abst.csv,0.596,0.501650,0.375309,0.429379,0.560764


### Continue with 3 models combinations:

In [22]:

# To generate all combinatition of pairs of models
# https://www.geeksforgeeks.org/itertools-combinations-module-python-print-possible-combinations/
from itertools import combinations

pair_iterator_3 = list(combinations(file_name, 3))

In [23]:
len(pair_iterator_3)

455

In [25]:
# for this purpose, since I need to average two files together, I feel it's easier to use pandas dataframe and use vectorized operation

df_ensemble_compare_3 = pd.DataFrame(columns = ['Model 1', 'Model 2', 'Model 3', 'Accuracy', 'Precision', 'Recall','F1','AUC'])

for files in pair_iterator_3:
    temp_pred_result = []
    
    
    # temporary hold of data from two files
    df_file_1 = pd.read_csv(files[0], 
                            names = ['Class 0', 'Class 1'], 
                            sep=" ")

    df_file_2 = pd.read_csv(files[1], 
                            names = ['Class 0', 'Class 1'], 
                            sep=" ")
    
    df_file_3 = pd.read_csv(files[2], 
                            names = ['Class 0', 'Class 1'], 
                            sep=" ")
    
    # average two files
    #df_combine = df_file_1 * 0.5 + df_file_2 * 0.5
    df_combine = (df_file_1 + df_file_2 + df_file_3) / 3

    # compute the new predicted output
    for i in df_combine.itertuples():
        temp_pred_result.append(np.argmax([i._1, i._2]))
    temp_pred_result
    
    accuracy = metrics.accuracy_score(y_test_true, temp_pred_result)
    precision = metrics.precision_score(y_test_true, temp_pred_result)
    recall = metrics.recall_score(y_test_true, temp_pred_result)
    f1 = metrics.f1_score(y_test_true, temp_pred_result)
    auc = metrics.roc_auc_score(y_test_true, temp_pred_result)
    
    
    row_to_add = {'Model 1': clean_filename(files[0]), 'Model 2': clean_filename(files[1]), 'Model 3': clean_filename(files[2]), 'Accuracy': accuracy, 'Precision':precision, 'Recall': recall,'F1': f1,'AUC': auc}
    
    df_ensemble_compare_3 = df_ensemble_compare_3.append(row_to_add, ignore_index = True)
    
df_ensemble_compare_3

Unnamed: 0,Model 1,Model 2,Model 3,Accuracy,Precision,Recall,F1,AUC
0,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,HATT.csv,0.661,0.742647,0.249383,0.373383,0.595280
1,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,Distil-BERT_5yr_claims_dev_prob-Copy1.csv,0.618,0.661972,0.116049,0.197479,0.537857
2,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,BERT_fine_tune_5yr_claims_dev_prob-Copy1.csv,0.625,0.644231,0.165432,0.263261,0.551624
3,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,CNN_5yr_claims_dev_prob-Copy1.csv,0.638,0.618785,0.276543,0.382253,0.580288
4,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,0.603,0.653846,0.041975,0.078886,0.513425
...,...,...,...,...,...,...,...,...
450,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,CNN_5yr_abstract_title_dev_prob-Copy1.csv,LSTM_abst.csv,0.637,0.588235,0.345679,0.435459,0.590487
451,LSTM.csv,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,CNN_5yr_abstract_title_dev_prob-Copy1.csv,0.650,0.632850,0.323457,0.428105,0.597863
452,LSTM.csv,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,LSTM_abst.csv,0.669,0.671296,0.358025,0.466989,0.619348
453,LSTM.csv,CNN_5yr_abstract_title_dev_prob-Copy1.csv,LSTM_abst.csv,0.646,0.607595,0.355556,0.448598,0.599627


In [26]:
print("Rank by Accuracy")
df_ensemble_compare_3.sort_values(by = "Accuracy", ascending = False)

Rank by Accuracy


Unnamed: 0,Model 1,Model 2,Model 3,Accuracy,Precision,Recall,F1,AUC
20,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,HATT.csv,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,0.712,0.785366,0.397531,0.527869,0.661791
226,HATT.csv,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,0.707,0.771845,0.392593,0.520458,0.656800
99,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,HATT.csv,LSTM.csv,0.699,0.722222,0.417284,0.528951,0.654020
185,HATT.csv,BERT_fine_tune_5yr_claims_dev_prob-Copy1.csv,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,0.698,0.703557,0.439506,0.541033,0.656728
21,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,HATT.csv,LSTM.csv,0.697,0.705645,0.432099,0.535988,0.654705
...,...,...,...,...,...,...,...,...
4,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,0.603,0.653846,0.041975,0.078886,0.513425
443,HATT_abst.csv,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,LSTM_abst.csv,0.602,0.515982,0.279012,0.362179,0.550431
27,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,Distil-BERT_5yr_claims_dev_prob-Copy1.csv,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,0.602,0.629630,0.041975,0.078704,0.512584
80,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,HATT_abst.csv,LSTM_abst.csv,0.600,0.510823,0.291358,0.371069,0.550721


In [27]:
print("Rank by Precision")
df_ensemble_compare_3.sort_values(by = "Precision", ascending = False)

Rank by Precision


Unnamed: 0,Model 1,Model 2,Model 3,Accuracy,Precision,Recall,F1,AUC
390,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,0.622,0.829268,0.083951,0.152466,0.536093
204,HATT.csv,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,0.651,0.825581,0.175309,0.289206,0.575049
16,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,HATT.csv,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,0.646,0.822785,0.160494,0.268595,0.568482
58,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,0.615,0.812500,0.064198,0.118993,0.527057
136,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,0.621,0.809524,0.083951,0.152125,0.535253
...,...,...,...,...,...,...,...,...
158,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,HATT_abst.csv,LSTM_abst.csv,0.607,0.528571,0.274074,0.360976,0.553844
424,BERT_CNN_5yr_claims_dev_prob-Copy1.csv,HATT_abst.csv,LSTM_abst.csv,0.610,0.526316,0.370370,0.434783,0.571740
438,HATT_abst.csv,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,LSTM_abst.csv,0.609,0.524138,0.375309,0.437410,0.571688
443,HATT_abst.csv,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,LSTM_abst.csv,0.602,0.515982,0.279012,0.362179,0.550431


### Continue with 4 models combinations:

In [28]:

# To generate all combinatition of pairs of models
# https://www.geeksforgeeks.org/itertools-combinations-module-python-print-possible-combinations/
from itertools import combinations

pair_iterator_4 = list(combinations(file_name, 4))

In [29]:
len(pair_iterator_4)

1365

In [30]:
pair_iterator_4

[('Prediction_Output/TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-Copy1.csv',
  'Prediction_Output/BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv',
  'Prediction_Output/HATT.csv',
  'Prediction_Output/Distil-BERT_5yr_claims_dev_prob-Copy1.csv'),
 ('Prediction_Output/TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-Copy1.csv',
  'Prediction_Output/BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv',
  'Prediction_Output/HATT.csv',
  'Prediction_Output/BERT_fine_tune_5yr_claims_dev_prob-Copy1.csv'),
 ('Prediction_Output/TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-Copy1.csv',
  'Prediction_Output/BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv',
  'Prediction_Output/HATT.csv',
  'Prediction_Output/CNN_5yr_claims_dev_prob-Copy1.csv'),
 ('Prediction_Output/TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-Copy1.csv',
  'Prediction_Output/BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv',
  'Prediction_Output/HATT.csv',
  'Prediction_Output/BERT_CNN_5yr_claims_last_510_words_dev_prob.csv'),
 ('Prediction_O

In [31]:
# for this purpose, since I need to average two files together, I feel it's easier to use pandas dataframe and use vectorized operation

df_ensemble_compare_4 = pd.DataFrame(columns = ['Model 1', 'Model 2', 'Model 3', 'Model 4', 'Accuracy', 'Precision', 'Recall','F1','AUC'])

for files in pair_iterator_4:
    temp_pred_result = []
    
    
    # temporary hold of data from two files
    df_file_1 = pd.read_csv(files[0], 
                            names = ['Class 0', 'Class 1'], 
                            sep=" ")

    df_file_2 = pd.read_csv(files[1], 
                            names = ['Class 0', 'Class 1'], 
                            sep=" ")
    
    df_file_3 = pd.read_csv(files[2], 
                            names = ['Class 0', 'Class 1'], 
                            sep=" ")
    
    df_file_4 = pd.read_csv(files[3], 
                            names = ['Class 0', 'Class 1'], 
                            sep=" ")
    
    # average two files
    #df_combine = df_file_1 * 0.5 + df_file_2 * 0.5
    df_combine = (df_file_1 + df_file_2 + df_file_3 + df_file_4) / 4

    # compute the new predicted output
    for i in df_combine.itertuples():
        temp_pred_result.append(np.argmax([i._1, i._2]))
    temp_pred_result
    
    accuracy = metrics.accuracy_score(y_test_true, temp_pred_result)
    precision = metrics.precision_score(y_test_true, temp_pred_result)
    recall = metrics.recall_score(y_test_true, temp_pred_result)
    f1 = metrics.f1_score(y_test_true, temp_pred_result)
    auc = metrics.roc_auc_score(y_test_true, temp_pred_result)
    
    
    row_to_add = {'Model 1': clean_filename(files[0]), 'Model 2': clean_filename(files[1]), 'Model 3': clean_filename(files[2]), 'Model 4': clean_filename(files[3]), 'Accuracy': accuracy, 'Precision':precision, 'Recall': recall,'F1': f1,'AUC': auc}
    
    df_ensemble_compare_4 = df_ensemble_compare_4.append(row_to_add, ignore_index = True)
    
df_ensemble_compare_4

Unnamed: 0,Model 1,Model 2,Model 3,Model 4,Accuracy,Precision,Recall,F1,AUC
0,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,HATT.csv,Distil-BERT_5yr_claims_dev_prob-Copy1.csv,0.656,0.756303,0.222222,0.343511,0.586741
1,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,HATT.csv,BERT_fine_tune_5yr_claims_dev_prob-Copy1.csv,0.649,0.701493,0.232099,0.348794,0.582436
2,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,HATT.csv,CNN_5yr_claims_dev_prob-Copy1.csv,0.673,0.689320,0.350617,0.464812,0.621527
3,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,HATT.csv,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,0.628,0.761905,0.118519,0.205128,0.546654
4,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,HATT.csv,BERT_fine_tune_5yr_abstract_title_dev_prob-Cop...,0.654,0.683230,0.271605,0.388693,0.592945
...,...,...,...,...,...,...,...,...,...
1360,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,LSTM.csv,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,CNN_5yr_abstract_title_dev_prob-Copy1.csv,0.666,0.668246,0.348148,0.457792,0.615251
1361,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,LSTM.csv,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,LSTM_abst.csv,0.685,0.706422,0.380247,0.494382,0.636342
1362,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,LSTM.csv,CNN_5yr_abstract_title_dev_prob-Copy1.csv,LSTM_abst.csv,0.655,0.623967,0.372840,0.466770,0.609949
1363,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,CNN_5yr_abstract_title_dev_prob-Copy1.csv,LSTM_abst.csv,0.637,0.610526,0.286420,0.389916,0.581025


In [32]:
print("Rank by Accuracy")
df_ensemble_compare_4.sort_values(by = "Accuracy", ascending = False)

Rank by Accuracy


Unnamed: 0,Model 1,Model 2,Model 3,Model 4,Accuracy,Precision,Recall,F1,AUC
420,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,HATT.csv,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,LSTM.csv,0.715,0.752101,0.441975,0.556765,0.671408
95,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,HATT.csv,BERT_fine_tune_5yr_claims_dev_prob-Copy1.csv,LSTM.csv,0.704,0.746606,0.407407,0.527157,0.656645
134,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,HATT.csv,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,LSTM.csv,0.702,0.720165,0.432099,0.540123,0.658907
381,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,HATT.csv,BERT_fine_tune_5yr_claims_dev_prob-Copy1.csv,LSTM.csv,0.701,0.747664,0.395062,0.516963,0.652153
84,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,HATT.csv,Distil-BERT_5yr_claims_dev_prob-Copy1.csv,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,0.701,0.791209,0.355556,0.490630,0.645845
...,...,...,...,...,...,...,...,...,...
592,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,LSTM_abst.csv,0.608,0.600000,0.096296,0.165957,0.526299
24,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,BERT_fine_tune_5yr_claims_dev_prob-Copy1.csv,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,0.606,0.622222,0.069136,0.124444,0.520282
168,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,Distil-BERT_5yr_claims_dev_prob-Copy1.csv,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,0.606,0.703704,0.046914,0.087963,0.516734
47,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,0.605,0.678571,0.046914,0.087760,0.515894


In [33]:
print("Rank by Precision")
df_ensemble_compare_4.sort_values(by = "Precision", ascending = False)

Rank by Precision


Unnamed: 0,Model 1,Model 2,Model 3,Model 4,Accuracy,Precision,Recall,F1,AUC
111,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,HATT.csv,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,0.665,0.830189,0.217284,0.344423,0.593516
805,HATT.csv,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,0.665,0.824074,0.219753,0.346979,0.593910
166,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,Distil-BERT_5yr_claims_dev_prob-Copy1.csv,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,0.620,0.820513,0.079012,0.144144,0.533624
302,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,LSTM.csv,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,0.641,0.819444,0.145679,0.247379,0.561915
113,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,HATT.csv,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,0.637,0.818182,0.133333,0.229299,0.556583
...,...,...,...,...,...,...,...,...,...
619,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,BERT_CNN_5yr_claims_dev_prob-Copy1.csv,HATT_abst.csv,LSTM_abst.csv,0.616,0.548837,0.291358,0.380645,0.564166
1319,BERT_fine_tune_5yr_abstract_title_dev_prob-Cop...,HATT_abst.csv,CNN_5yr_abstract_title_dev_prob-Copy1.csv,LSTM_abst.csv,0.617,0.548246,0.308642,0.394945,0.567766
1124,BERT_fine_tune_5yr_claims_dev_prob-Copy1.csv,BERT_CNN_5yr_claims_dev_prob-Copy1.csv,HATT_abst.csv,LSTM_abst.csv,0.616,0.542510,0.330864,0.411043,0.570474
1338,BERT_CNN_5yr_claims_dev_prob-Copy1.csv,HATT_abst.csv,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,LSTM_abst.csv,0.613,0.540179,0.298765,0.384738,0.562828


### Now, let's merge the Ensemble models with the original single models and rank them altogether for comparison purpose

In [34]:
# Create a copy to preserve the original table
summary_df_copy = summary_df.copy()

# Rename the column name to align both tables
summary_df_copy = summary_df_copy.rename(columns={"Model": "Model 1"})

# Concatenate both tables (this merge based on the column header)
df_all = pd.concat([df_ensemble_compare_4, df_ensemble_compare_3, df_ensemble_compare_2 , summary_df_copy])
df_all

Unnamed: 0,Model 1,Model 2,Model 3,Model 4,Accuracy,Precision,Recall,F1,AUC
0,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,HATT.csv,Distil-BERT_5yr_claims_dev_prob-Copy1.csv,0.656,0.756303,0.222222,0.343511,0.586741
1,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,HATT.csv,BERT_fine_tune_5yr_claims_dev_prob-Copy1.csv,0.649,0.701493,0.232099,0.348794,0.582436
2,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,HATT.csv,CNN_5yr_claims_dev_prob-Copy1.csv,0.673,0.689320,0.350617,0.464812,0.621527
3,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,HATT.csv,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,0.628,0.761905,0.118519,0.205128,0.546654
4,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,HATT.csv,BERT_fine_tune_5yr_abstract_title_dev_prob-Cop...,0.654,0.683230,0.271605,0.388693,0.592945
...,...,...,...,...,...,...,...,...,...
10,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,,,,0.689,0.635838,0.543210,0.585885,0.665723
11,LSTM.csv,,,,0.681,0.622857,0.538272,0.577483,0.658211
12,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,,,,0.618,0.632184,0.135802,0.223577,0.541010
13,CNN_5yr_abstract_title_dev_prob-Copy1.csv,,,,0.589,0.488550,0.316049,0.383808,0.545420


In [35]:
# Now we can sort/rank them again
print("Rank by Accuracy")
df_all.sort_values(by = "Accuracy", ascending = False)


Rank by Accuracy


Unnamed: 0,Model 1,Model 2,Model 3,Model 4,Accuracy,Precision,Recall,F1,AUC
420,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,HATT.csv,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,LSTM.csv,0.715,0.752101,0.441975,0.556765,0.671408
20,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,HATT.csv,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,,0.712,0.785366,0.397531,0.527869,0.661791
226,HATT.csv,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,,0.707,0.771845,0.392593,0.520458,0.656800
95,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,HATT.csv,BERT_fine_tune_5yr_claims_dev_prob-Copy1.csv,LSTM.csv,0.704,0.746606,0.407407,0.527157,0.656645
134,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,HATT.csv,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,LSTM.csv,0.702,0.720165,0.432099,0.540123,0.658907
...,...,...,...,...,...,...,...,...,...
94,HATT_abst.csv,LSTM_abst.csv,,,0.596,0.501650,0.375309,0.429379,0.560764
14,LSTM_abst.csv,,,,0.594,0.498471,0.402469,0.445355,0.563419
84,BERT_CNN_5yr_claims_dev_prob-Copy1.csv,HATT_abst.csv,,,0.593,0.496855,0.390123,0.437068,0.560608
13,CNN_5yr_abstract_title_dev_prob-Copy1.csv,,,,0.589,0.488550,0.316049,0.383808,0.545420


In [36]:
print("Rank by Precision")
df_all.sort_values(by = "Precision", ascending = False)


Rank by Precision


Unnamed: 0,Model 1,Model 2,Model 3,Model 4,Accuracy,Precision,Recall,F1,AUC
111,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,HATT.csv,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,0.665,0.830189,0.217284,0.344423,0.593516
390,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,,0.622,0.829268,0.083951,0.152466,0.536093
204,HATT.csv,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,,0.651,0.825581,0.175309,0.289206,0.575049
805,HATT.csv,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,0.665,0.824074,0.219753,0.346979,0.593910
16,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,HATT.csv,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,,0.646,0.822785,0.160494,0.268595,0.568482
...,...,...,...,...,...,...,...,...,...
94,HATT_abst.csv,LSTM_abst.csv,,,0.596,0.501650,0.375309,0.429379,0.560764
14,LSTM_abst.csv,,,,0.594,0.498471,0.402469,0.445355,0.563419
84,BERT_CNN_5yr_claims_dev_prob-Copy1.csv,HATT_abst.csv,,,0.593,0.496855,0.390123,0.437068,0.560608
13,CNN_5yr_abstract_title_dev_prob-Copy1.csv,,,,0.589,0.488550,0.316049,0.383808,0.545420


In [37]:
print("Rank by Recall")
df_all.sort_values(by = "Recall", ascending = False)


Rank by Recall


Unnamed: 0,Model 1,Model 2,Model 3,Model 4,Accuracy,Precision,Recall,F1,AUC
10,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,,,,0.689,0.635838,0.543210,0.585885,0.665723
11,LSTM.csv,,,,0.681,0.622857,0.538272,0.577483,0.658211
95,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,LSTM.csv,,,0.686,0.635821,0.525926,0.575676,0.660442
35,HATT.csv,LSTM.csv,,,0.690,0.644377,0.523457,0.577657,0.663409
216,HATT.csv,BERT_CNN_5yr_claims_dev_prob-Copy1.csv,LSTM.csv,,0.695,0.657233,0.516049,0.578147,0.666428
...,...,...,...,...,...,...,...,...,...
4,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,,0.603,0.653846,0.041975,0.078886,0.513425
138,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,,0.604,0.680000,0.041975,0.079070,0.514265
74,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,,,0.599,0.611111,0.027160,0.052009,0.507698
60,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,,0.597,0.562500,0.022222,0.042755,0.505229


In [38]:
print("Rank by F1")
df_all.sort_values(by = "F1", ascending = False)


Rank by F1


Unnamed: 0,Model 1,Model 2,Model 3,Model 4,Accuracy,Precision,Recall,F1,AUC
10,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,,,,0.689,0.635838,0.543210,0.585885,0.665723
216,HATT.csv,BERT_CNN_5yr_claims_dev_prob-Copy1.csv,LSTM.csv,,0.695,0.657233,0.516049,0.578147,0.666428
35,HATT.csv,LSTM.csv,,,0.690,0.644377,0.523457,0.577657,0.663409
11,LSTM.csv,,,,0.681,0.622857,0.538272,0.577483,0.658211
95,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,LSTM.csv,,,0.686,0.635821,0.525926,0.575676,0.660442
...,...,...,...,...,...,...,...,...,...
4,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,,0.603,0.653846,0.041975,0.078886,0.513425
27,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,Distil-BERT_5yr_claims_dev_prob-Copy1.csv,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,,0.602,0.629630,0.041975,0.078704,0.512584
74,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,,,0.599,0.611111,0.027160,0.052009,0.507698
60,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,,0.597,0.562500,0.022222,0.042755,0.505229


In [39]:
print("Rank by AUC")
df_all.sort_values(by = "AUC", ascending = False)


Rank by AUC


Unnamed: 0,Model 1,Model 2,Model 3,Model 4,Accuracy,Precision,Recall,F1,AUC
420,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,HATT.csv,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,LSTM.csv,0.715,0.752101,0.441975,0.556765,0.671408
216,HATT.csv,BERT_CNN_5yr_claims_dev_prob-Copy1.csv,LSTM.csv,,0.695,0.657233,0.516049,0.578147,0.666428
10,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,,,,0.689,0.635838,0.543210,0.585885,0.665723
35,HATT.csv,LSTM.csv,,,0.690,0.644377,0.523457,0.577657,0.663409
840,HATT.csv,BERT_CNN_5yr_claims_dev_prob-Copy1.csv,BERT_fine_tune_5yr_claims_dev_prob_last_n_word...,LSTM.csv,0.695,0.665563,0.496296,0.568600,0.663274
...,...,...,...,...,...,...,...,...,...
4,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,BERT_CNN_5yr_abstract_title_dev_prob-Copy1.csv,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,,0.603,0.653846,0.041975,0.078886,0.513425
27,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,Distil-BERT_5yr_claims_dev_prob-Copy1.csv,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,,0.602,0.629630,0.041975,0.078704,0.512584
74,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,,,0.599,0.611111,0.027160,0.052009,0.507698
60,TFIDF_NaiveBayes_5yr_abstract_title_dev_prob-C...,BERT_CNN_5yr_claims_last_510_words_dev_prob.csv,TFIDF_NaiveBayes_5yr_claims_dev_prob-Copy1.csv,,0.597,0.562500,0.022222,0.042755,0.505229
