# Access Each Model's 10 Fold Predictions

In [79]:
import os
import pandas as pd

# Function to read and process CSV files
def read_and_process_csv(folder_path, model_name, split_ratio):
    dfs = []

    for fold_folder in os.listdir(folder_path):
        fold_path = os.path.join(folder_path, fold_folder)

        if os.path.isdir(fold_path):
            for i in range(10):
                csv_file = os.path.join(fold_path, f"probsandlab_{i}_bert.csv")

                if os.path.isfile(csv_file):
                    df = pd.read_csv(csv_file)
                    length = int(len(df) * split_ratio)
                    dfs.append(df[length:])

    combined_df = pd.concat(dfs, ignore_index=True)
    combined_df.rename(columns={combined_df.columns[0]: model_name}, inplace=True)

    return combined_df

# Define the folder paths and split ratios for different models
model_info = {
    "bert": ("LRADAMWDROPOUT_BERT_fold_results", 0.5),
    "finbert": ("LRADAMWDROPOUT_finBERT_fold_results", 0.5),
    "albert": ("LRADAMWDROPOUT_alBERT_fold_results", 2/3), #because albert was trained with 3 epochs, not 2, thus the split ratio is different
    "distilbert": ("LRADAMWDROPOUT_distilbert_fold_results", 0.5),
    "electra": ("LRADAMWDROPOUT_electra_fold_results", 0.5),
    "roberta": ("LRADAMWDROPOUT_roberta_fold_results", 0.5),
}

# Initialize an empty dictionary to store the dataframes for each model
model_dataframes = {}

# Iterate over the model info and read/process CSV files for each
for model_name, (folder_path, split_ratio) in model_info.items():
    model_dataframes[model_name] = read_and_process_csv(folder_path, model_name, split_ratio)

# Concatenate all dataframes into one
combined_df = pd.concat(list(model_dataframes.values()), axis=1)
combined_df

Unnamed: 0,bert,Label,finbert,Label.1,albert,Label.2,distilbert,Label.3,electra,Label.4,roberta,Label.5
0,0.019252,0,0.015983,0,0.077024,0,0.081898,0,0.042294,0,0.206115,0
1,0.195125,0,0.171317,0,0.150486,0,0.146320,0,0.130800,0,0.126675,0
2,0.413737,1,0.333470,1,0.342290,1,0.243358,1,0.307695,1,0.264530,1
3,0.030721,0,0.031983,0,0.046123,0,0.075985,0,0.058509,0,0.034376,0
4,0.123381,0,0.079091,0,0.088136,0,0.106967,0,0.063211,0,0.043188,0
...,...,...,...,...,...,...,...,...,...,...,...,...
8717,0.960492,1,1.043077,1,0.966354,1,0.903408,1,1.073371,1,0.926987,1
8718,0.385009,1,0.273092,1,0.380588,1,0.240789,1,0.368065,1,0.595387,1
8719,0.964928,1,0.980346,1,0.885620,1,0.847621,1,0.913632,1,0.964808,1
8720,0.400460,0,0.465044,0,0.383342,0,0.347424,0,0.640280,0,0.307042,0


# Determine Models' Individual Predictions to[model]_preds column

In [81]:
import pandas as pd

model_columns = [
    ('bert_preds', 0),
    ('finbert_preds', 2),
    ('albert_preds', 4),
    ('distilbert_preds', 6),
    ('electra_preds', 8),
    ('roberta_preds', 10)
]

for model_col, col_index in model_columns:
    combined_df[model_col] = combined_df.iloc[:, col_index].apply(lambda x: 1 if x > 0.5 else 0)

combined_df['ave_probs'] = combined_df.iloc[:, [0, 2, 4, 6, 8, 10]].mean(axis=1)
combined_df['vote_preds'] = combined_df['ave_probs'].apply(lambda x: 1 if x > 0.5 else 0)
combined_df

Unnamed: 0,bert,Label,finbert,Label.1,albert,Label.2,distilbert,Label.3,electra,Label.4,roberta,Label.5,bert_preds,finbert_preds,albert_preds,distilbert_preds,electra_preds,roberta_preds,ave_probs,vote_preds
0,0.019252,0,0.015983,0,0.077024,0,0.081898,0,0.042294,0,0.206115,0,0,0,0,0,0,0,0.073761,0
1,0.195125,0,0.171317,0,0.150486,0,0.146320,0,0.130800,0,0.126675,0,0,0,0,0,0,0,0.153454,0
2,0.413737,1,0.333470,1,0.342290,1,0.243358,1,0.307695,1,0.264530,1,0,0,0,0,0,0,0.317513,0
3,0.030721,0,0.031983,0,0.046123,0,0.075985,0,0.058509,0,0.034376,0,0,0,0,0,0,0,0.046283,0
4,0.123381,0,0.079091,0,0.088136,0,0.106967,0,0.063211,0,0.043188,0,0,0,0,0,0,0,0.083995,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8717,0.960492,1,1.043077,1,0.966354,1,0.903408,1,1.073371,1,0.926987,1,1,1,1,1,1,1,0.978948,1
8718,0.385009,1,0.273092,1,0.380588,1,0.240789,1,0.368065,1,0.595387,1,0,0,0,0,0,1,0.373822,0
8719,0.964928,1,0.980346,1,0.885620,1,0.847621,1,0.913632,1,0.964808,1,1,1,1,1,1,1,0.926159,1
8720,0.400460,0,0.465044,0,0.383342,0,0.347424,0,0.640280,0,0.307042,0,0,0,0,0,1,0,0.423932,0


# Perform Hard Voting with the four models, and evaluate all the models

In [87]:
import pandas as pd
from sklearn.metrics import classification_report, f1_score

labels = combined_df.iloc[:, 1].astype(int)

model_columns = ['finbert_preds' ,'bert_preds', 'electra_preds', 'roberta_preds'] #four models for hard voting
additional_columns = ['albert_preds', 'distilbert_preds']

# Calculate predictions and metrics for each model
metrics = {
    'Macro F1 Score': f1_score(labels, preds, average='macro'),
    'Micro F1 Score': f1_score(labels, preds, average='micro'),
    'Weighted F1 Score': f1_score(labels, preds, average='weighted'),
    'Accuracy': accuracy_score(labels, preds),
    'Precision': precision_score(labels, preds),
    'Recall': recall_score(labels, preds)
}

classification_reports = []

for column in model_columns:
    preds = (combined_df[column] > 0.5).astype(int)
    classification_reports.append(classification_report(labels, preds, output_dict=True))
    metrics[column] = {
        'Macro F1 Score': f1_score(labels, preds, average='macro'),
        'Micro F1 Score': f1_score(labels, preds, average='micro'),
        'Weighted F1 Score': f1_score(labels, preds, average='weighted'),
        'Accuracy': accuracy_score(labels, preds),
        'Precision': precision_score(labels, preds),
        'Recall': recall_score(labels, preds)
    }

# Calculate predictions and metrics for the voting model
vote_preds = (combined_df[model_columns].mean(axis=1) > 0.5).astype(int)
vote_metrics = {
    'Macro F1 Score': f1_score(labels, vote_preds, average='macro'),
    'Micro F1 Score': f1_score(labels, vote_preds, average='micro'),
    'Weighted F1 Score': f1_score(labels, vote_preds, average='weighted'),
    'Accuracy': accuracy_score(labels, vote_preds),
    'Precision': precision_score(labels, vote_preds),
    'Recall': recall_score(labels, vote_preds)
}

for column in additional_columns:
    preds = (combined_df[column] > 0.5).astype(int)
    classification_reports.append(classification_report(labels, preds, output_dict=True))
    metrics[column] = {
        'Macro F1 Score': f1_score(labels, preds, average='macro'),
        'Micro F1 Score': f1_score(labels, preds, average='micro'),
        'Weighted F1 Score': f1_score(labels, preds, average='weighted'),
        'Accuracy': accuracy_score(labels, preds),
        'Precision': precision_score(labels, preds),
        'Recall': recall_score(labels, preds)
    }
    
model_columns = ['finbert_preds', 'bert_preds', 'electra_preds', 'roberta_preds', 'albert_preds', 'distilbert_preds']

# Create the summary table
summary_data = {
    'Model': model_columns + ['Our Model'],
    'Macro F1 Score': [metrics[model]['Macro F1 Score'] for model in model_columns] + [vote_metrics['Macro F1 Score']],
    'Micro F1 Score': [metrics[model]['Micro F1 Score'] for model in model_columns] + [vote_metrics['Micro F1 Score']],
    'Weighted F1 Score': [metrics[model]['Weighted F1 Score'] for model in model_columns] + [vote_metrics['Weighted F1 Score']],
    'Accuracy': [metrics[model]['Accuracy'] for model in model_columns] + [vote_metrics['Accuracy']],
    'Precision': [metrics[model]['Precision'] for model in model_columns] + [vote_metrics['Precision']],
    'Recall': [metrics[model]['Recall'] for model in model_columns] + [vote_metrics['Recall']]
}

summary_table = pd.DataFrame(summary_data).sort_values(by='Macro F1 Score', ascending=False)

# Display the summary table
print("Summary Table:")
summary_table

Summary Table:


Unnamed: 0,Model,Macro F1 Score,Micro F1 Score,Weighted F1 Score,Accuracy,Precision,Recall
6,Our Model,0.766231,0.766682,0.766723,0.766682,0.752806,0.759152
2,electra_preds,0.761113,0.761179,0.761303,0.761179,0.733785,0.782033
3,roberta_preds,0.759112,0.759115,0.759151,0.759115,0.725831,0.793834
1,bert_preds,0.758197,0.758427,0.758554,0.758427,0.737735,0.76421
0,finbert_preds,0.756629,0.756707,0.756838,0.756707,0.72995,0.776012
4,albert_preds,0.753847,0.754529,0.754468,0.754529,0.744588,0.737235
5,distilbert_preds,0.752415,0.752809,0.752888,0.752809,0.736381,0.748796


In [88]:
# Initialize dictionaries to store per-label metrics for each model
f1_per_label = {}
precision_per_label = {}
recall_per_label = {}

model_columns = ['finbert_preds' ,'bert_preds', 'electra_preds', 'roberta_preds']  #four models for hard voting
additional_columns = ['albert_preds', 'distilbert_preds']

for column in model_columns:
    preds = (combined_df[column] > 0.5).astype(int)
    classification_rep = classification_report(labels, preds, output_dict=True, zero_division=1)
    f1_per_label[column] = {str(label): classification_rep[str(label)]['f1-score'] for label in range(num_labels)}
    precision_per_label[column] = {str(label): classification_rep[str(label)]['precision'] for label in range(num_labels)}
    recall_per_label[column] = {str(label): classification_rep[str(label)]['recall'] for label in range(num_labels)}

# Calculate predictions and metrics for the voting model
vote_preds = (combined_df[model_columns].mean(axis=1) > 0.5).astype(int)
classification_rep_vote = classification_report(labels, vote_preds, output_dict=True, zero_division=1)
f1_per_label['vote_preds'] = {str(label): classification_rep_vote[str(label)]['f1-score'] for label in range(num_labels)}
precision_per_label['vote_preds'] = {str(label): classification_rep_vote[str(label)]['precision'] for label in range(num_labels)}
recall_per_label['vote_preds'] = {str(label): classification_rep_vote[str(label)]['recall'] for label in range(num_labels)}

for column in additional_columns:
    preds = (combined_df[column] > 0.5).astype(int)
    classification_rep = classification_report(labels, preds, output_dict=True, zero_division=1)
    f1_per_label[column] = {str(label): classification_rep[str(label)]['f1-score'] for label in range(num_labels)}
    precision_per_label[column] = {str(label): classification_rep[str(label)]['precision'] for label in range(num_labels)}
    recall_per_label[column] = {str(label): classification_rep[str(label)]['recall'] for label in range(num_labels)}

model_columns = ['finbert_preds', 'bert_preds', 'electra_preds', 'roberta_preds', 'albert_preds', 'distilbert_preds']

# Create DataFrames for per-label metrics
f1_per_label_df = pd.DataFrame(f1_per_label)
precision_per_label_df = pd.DataFrame(precision_per_label)
recall_per_label_df = pd.DataFrame(recall_per_label)

# Create a combined DataFrame with the desired format
combined_metrics_df = pd.DataFrame({
    'Method': model_columns + ['Our Model'],
    'Precision_0': [precision_per_label_df[column]['0'] for column in model_columns] + [precision_per_label_df['vote_preds']['0']],
    'Recall_0': [recall_per_label_df[column]['0'] for column in model_columns] + [recall_per_label_df['vote_preds']['0']],
    'F1_0': [f1_per_label_df[column]['0'] for column in model_columns] + [f1_per_label_df['vote_preds']['0']],
    'Precision_1': [precision_per_label_df[column]['1'] for column in model_columns] + [precision_per_label_df['vote_preds']['1']],
    'Recall_1': [recall_per_label_df[column]['1'] for column in model_columns] + [recall_per_label_df['vote_preds']['1']],
    'F1_1': [f1_per_label_df[column]['1'] for column in model_columns] + [f1_per_label_df['vote_preds']['1']],
})

# Display the combined DataFrame
combined_metrics_df

Unnamed: 0,Method,Precision_0,Recall_0,F1_0,Precision_1,Recall_1,F1_1
0,finbert_preds,0.784123,0.739168,0.760982,0.72995,0.776012,0.752276
1,bert_preds,0.778557,0.753173,0.765655,0.737735,0.76421,0.750739
2,electra_preds,0.789388,0.742232,0.765084,0.733785,0.782033,0.757141
3,roberta_preds,0.795264,0.727571,0.759913,0.725831,0.793834,0.758311
4,albert_preds,0.763392,0.770241,0.766801,0.744588,0.737235,0.740893
5,distilbert_preds,0.768222,0.756455,0.762293,0.736381,0.748796,0.742536
6,Our Model,0.779493,0.773523,0.776496,0.752806,0.759152,0.755966
