In [3]:
import pandas as pd
import os

from sklearn import metrics

In [8]:
MODEL_NAMES = ["bert-base-multilingual-uncased-sentiment", 
               "distilbert-base-multilingual-cased-sentiments-student",
               "FinancialBERT-Sentiment-Analysis",
               "twitter-roberta-base-sentiment-latest",
               "twitter-roberta-base-sentiment"
               ]

def load_predictions(model_name):
    true_values = pd.read_csv(os.path.join("model_testing_results", "submit_" + model_name + ".csv"), index_col = 0)
    predicted_values = pd.read_csv(os.path.join("model_testing_results", "validation_set_" + model_name + ".csv"), index_col=0, names=['Id','Prediction'])
    predicted_values[predicted_values['Prediction']==0] = -1
    return true_values, predicted_values

def calculate_metrics(true, pred):
    accuracy = metrics.accuracy_score(true.values, pred.values)
    f1 = metrics.f1_score(true.values, pred.values)
    return f1, accuracy

In [9]:
for model in MODEL_NAMES:
    true, pred = load_predictions(model)
    f1, accuracy = calculate_metrics(true, pred)

    print(f"Model: {model},\n \t F1 score : {round(f1,4)},\n \t accuracy : {round(accuracy,4)}")

Model: bert-base-multilingual-uncased-sentiment,
 	 F1 score : 0.8264,
 	 accuracy : 0.8277
Model: distilbert-base-multilingual-cased-sentiments-student,
 	 F1 score : 0.8254,
 	 accuracy : 0.8267
Model: FinancialBERT-Sentiment-Analysis,
 	 F1 score : 0.817,
 	 accuracy : 0.821
Model: twitter-roberta-base-sentiment-latest,
 	 F1 score : 0.8687,
 	 accuracy : 0.8694
Model: twitter-roberta-base-sentiment,
 	 F1 score : 0.8582,
 	 accuracy : 0.8612
