In [1]:
import pandas as pd
import os
import numpy as np
import scipy.stats


In [2]:
def mean_confidence_interval(data, confidence=0.95):
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
    return m, m-h, m+h, h

In [3]:
# Directory path containing the Excel files
import os
directory = '../result_xls/'
os.listdir(directory)

['Pretrained_BERTEmbd_with_NN.xlsx',
 'BERT_base_uncased.xlsx',
 'confusion_matrix_plots',
 'training_val_error_plots',
 'mdl_compr_ttest_result',
 'consolidated_result',
 'Pretrained_BERTEmbd_with_XGB.xlsx']

In [4]:
# Directory path containing the Excel files
directory = '../result_xls/'

# List to store individual DataFrames
dfs = []

# Iterate over each file in the directory
for filename in os.listdir(directory):
    
    if filename.endswith('.xlsx'):
        # Construct the file path
        file_path = os.path.join('../result_xls/', filename)
#         print(file_path)
        
        # Read the Excel file into a DataFrame
        df1 = pd.read_excel(file_path)
        
        # Get the unique categories from the 'EvalSet' column
        evalset_categories = df1['EvalSet'].unique()
#         print(evalset_categories)
        
        # Loop through each category value
        for category in evalset_categories:
            df = df1[df1['EvalSet'] == category]
            
            # Reset the index of the DataFrame
            df = df.reset_index(drop=True)
    

        
            columns = ['Model','EvalSet', 'Accuracy', 'M-Precision', 'M-Recall', 'M-F1-Score',
           'W-Precision', 'W-Recall', 'W-F1-Score', 'Runtime']
        


            df_agg = pd.DataFrame(columns=columns)

            row_data = {

            'Model': df.Model[0],
                
            'EvalSet': df.EvalSet[0],

            'Accuracy': round(mean_confidence_interval(df['Accuracy'], confidence=0.95)[0],3),
            'Accuracy_CI': round(mean_confidence_interval(df['Accuracy'], confidence=0.95)[3],3),

            'M-Precision': round(mean_confidence_interval(df['M-Precision'], confidence=0.95)[0],3),
            'M-Precision_CI': round(mean_confidence_interval(df['M-Precision'], confidence=0.95)[3],3),

            'M-Recall': round(mean_confidence_interval(df['M-Recall'], confidence=0.95)[0],3),
            'M-Recall_CI': round(mean_confidence_interval(df['M-Recall'], confidence=0.95)[3],3),

            'M-F1-Score': round(mean_confidence_interval(df['M-F1-Score'], confidence=0.95)[0],3),
            'M-F1-Score_CI': round(mean_confidence_interval(df['M-F1-Score'], confidence=0.95)[3],3),

            'W-Precision': round(mean_confidence_interval(df['W-Precision'], confidence=0.95)[0],3),
            'W-Precision_CI': round(mean_confidence_interval(df['W-Precision'], confidence=0.95)[3],3),

            'W-Recall': round(mean_confidence_interval(df['W-Recall'], confidence=0.95)[0],3),
            'W-Recall_CI': round(mean_confidence_interval(df['W-Recall'], confidence=0.95)[3],3),

            'W-F1-Score': round(mean_confidence_interval(df['W-F1-Score'], confidence=0.95)[0],3),
            'W-F1-Score_CI': round(mean_confidence_interval(df['W-F1-Score'], confidence=0.95)[3],3),

            'Runtime': round(mean_confidence_interval(df['Runtime'], confidence=0.95)[0],3),
            'Runtime_CI': round(mean_confidence_interval(df['Runtime'], confidence=0.95)[3],3)

            }

            df_agg = pd.concat([df_agg, pd.DataFrame([row_data])], ignore_index=True)

            # Append the DataFrame to the list
            dfs.append(df_agg)

# # Concatenate the DataFrames row-wise
concatenated_df = pd.concat(dfs, axis=0)

# # # Reset the index of the concatenated DataFrame
concatenated_df = concatenated_df.reset_index(drop=True)

# # # Display the concatenated DataFrame
display(concatenated_df)


Unnamed: 0,Model,EvalSet,Accuracy,M-Precision,M-Recall,M-F1-Score,W-Precision,W-Recall,W-F1-Score,Runtime,Accuracy_CI,M-Precision_CI,M-Recall_CI,M-F1-Score_CI,W-Precision_CI,W-Recall_CI,W-F1-Score_CI,Runtime_CI
0,Pretrained_BERTEmbd_with_NN,training,0.953,0.955,0.952,0.952,0.954,0.953,0.952,29.579,0.005,0.004,0.006,0.006,0.003,0.005,0.006,0.326
1,Pretrained_BERTEmbd_with_NN,validation,0.954,0.957,0.954,0.955,0.957,0.954,0.955,29.579,0.005,0.005,0.006,0.006,0.005,0.005,0.006,0.326
2,Pretrained_BERTEmbd_with_NN,test,0.953,0.955,0.953,0.953,0.955,0.953,0.952,29.579,0.006,0.003,0.006,0.006,0.003,0.006,0.006,0.326
3,BERT_base_uncased,training,0.953,0.957,0.953,0.953,0.957,0.953,0.953,176.357,0.003,0.002,0.004,0.003,0.003,0.003,0.003,17.977
4,BERT_base_uncased,validation,0.95,0.955,0.951,0.95,0.956,0.95,0.95,176.357,0.009,0.006,0.01,0.01,0.006,0.009,0.01,17.977
5,BERT_base_uncased,test,0.952,0.957,0.951,0.952,0.957,0.952,0.952,176.357,0.003,0.002,0.004,0.003,0.003,0.003,0.003,17.977
6,Pretrained_BERTEmbd_with_XGB,training,0.966,0.966,0.966,0.966,0.966,0.966,0.966,66.885,0.001,0.001,0.001,0.001,0.001,0.001,0.001,2.182
7,Pretrained_BERTEmbd_with_XGB,validation,0.953,0.953,0.953,0.953,0.953,0.953,0.953,66.885,0.005,0.005,0.005,0.005,0.005,0.005,0.005,2.182
8,Pretrained_BERTEmbd_with_XGB,test,0.952,0.953,0.952,0.953,0.953,0.952,0.952,66.885,0.002,0.002,0.003,0.003,0.003,0.002,0.002,2.182


In [5]:

concatenated_df.to_excel('../result_xls/consolidated_result/All_Scenario_Consolidated_Classification_Report.xlsx', 
                         index=False)
