In [1]:
from scipy.stats import ttest_rel
import pandas as pd
import os
import numpy as np
from scipy.stats import t #fetch t values for t-table

In [2]:
# Set display options to show all columns and unlimited column width
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)



directory='../result_xls/'
# Filter files with .xlsx extension
xlsx_files = [file for file in os.listdir(directory) if file.endswith('.xlsx')]


# Read each Excel file into a DataFrame
dataframes = []
for file in xlsx_files:
    file_path = os.path.join(directory, file)
    df = pd.read_excel(file_path)
    # Extract file name without extension
    model_name = os.path.splitext(file)[0]
    # Update "Model" column with file name
    df['Model'] = model_name
    dataframes.append(df)

# Concatenate all DataFrames into a single DataFrame
combined_df = pd.concat(dataframes)
combined_df=combined_df[['Model','EvalSet', 'M-Precision', 'M-Recall', 'M-F1-Score']]
combined_df
combined_df = combined_df.reset_index(drop=True)

filtered_df = combined_df[combined_df['EvalSet'] == 'test'].reset_index(drop=True)
print("Models Performance on Test data Table: ")
display(filtered_df)


def calculate_stddev(row, samples=5):
    deviations = row[1:-1] - row[-1]  # Subtract mean from each value
    squared_deviations = deviations ** 2  # Square the deviations
    sum_squared_deviations = np.sum(squared_deviations)  # Calculate mean of squared deviations
    stddev = np.sqrt(sum_squared_deviations/(samples-1))
    
    return stddev

# Define a function to calculate t-statistics
def calculate_t_stats(row, samples=5):
    t_stats = (row['Mean'] * np.sqrt(samples))/row['StdDev']
    return t_stats


p_val=0.05
samples=5
degrees_of_freedom= samples-1

# Calculate the t-value
t_tbl_scr = round(t.ppf(1 - p_val/2, degrees_of_freedom),3)
print('t-table score', t_tbl_scr)
# https://towardsdatascience.com/paired-t-test-to-evaluate-machine-learning-classifiers-1f395a6c93fa


###############################
## PAIRED TTEST on M-F1 Score
###############################

sample =list([1,100, 500, 1000, 1500])
filtered_df1 = filtered_df[['Model', 'M-F1-Score']].copy()
no_of_mdls = len(filtered_df1.Model.unique())
no_of_mdls

filtered_df1['sample_seed'] = sample * no_of_mdls
filtered_df1

pvt_filtered_df1 = filtered_df1.pivot(index='Model', columns='sample_seed', values='M-F1-Score')
pvt_filtered_df1 = pvt_filtered_df1 .reset_index()
pvt_filtered_df1

column_mapping = {1: 'S1', 100: 'S100', 500: 'S500', 1000: 'S1000', 1500: 'S1500'}
pvt_filtered_df2 = pvt_filtered_df1.rename(columns=column_mapping).rename_axis(columns=None)
pvt_filtered_df2



temp=pvt_filtered_df2.iloc[:, 1:].mul(100)
temp1= pd.concat([pvt_filtered_df2['Model'], temp], axis=1)


# Set the 'Model' column as the index
temp1.set_index('Model', inplace=True)

# Select the row for subtraction
subtract_from = temp1.loc['Pretrained_BERTEmbd_with_XGB']


# Subtract the selected row from all other rows
df_subtracted = temp1.subtract(subtract_from).abs()

# Reset the index
df_subtracted.reset_index(inplace=True)

# Print the modified DataFrame
print()
print('M-F-1 Difference from Base model Table:')
display(df_subtracted)


###################################
## Accept or Reject Null Hypothesis
###################################

# Calculate Mean
df_subtracted['Mean'] = df_subtracted.iloc[:, 1:].sum(axis=1) / 5


# Apply the function to each row and assign the results to the 'StdDev' column
df_subtracted['StdDev'] = df_subtracted.apply(calculate_stddev, axis=1)


# Apply the function to each row and assign the results to the 't_stats' column
df_subtracted['t_stats'] = df_subtracted.apply(calculate_t_stats, axis=1)


# # import numpy as np

df_subtracted['HYPOTHESIS_CHECK'] = np.where(df_subtracted['t_stats'].isna(), np.nan, 
                                             np.where((-2.776 <= df_subtracted['t_stats']) & 
                                                      (df_subtracted['t_stats'] <= 2.776), 
                                                      'Accept_Null', 'Reject_Null'))


df_subtracted = df_subtracted[['Model', 't_stats', 'HYPOTHESIS_CHECK']]
# #Accept NULL = No significance difference between two classifiers. 
display(df_subtracted)

Models Performance on Test data Table: 


Unnamed: 0,Model,EvalSet,M-Precision,M-Recall,M-F1-Score
0,Pretrained_BERTEmbd_with_NN,test,0.959,0.958,0.959
1,Pretrained_BERTEmbd_with_NN,test,0.952,0.946,0.946
2,Pretrained_BERTEmbd_with_NN,test,0.953,0.952,0.951
3,Pretrained_BERTEmbd_with_NN,test,0.955,0.953,0.954
4,Pretrained_BERTEmbd_with_NN,test,0.956,0.955,0.955
5,BERT_base_uncased,test,0.959,0.954,0.954
6,BERT_base_uncased,test,0.959,0.953,0.954
7,BERT_base_uncased,test,0.956,0.947,0.948
8,BERT_base_uncased,test,0.955,0.953,0.954
9,BERT_base_uncased,test,0.958,0.95,0.951


t-table score 2.776

M-F-1 Difference from Base model Table:


Unnamed: 0,Model,S1,S100,S500,S1000,S1500
0,BERT_base_uncased,0.2,0.1,0.5,0.3,0.0
1,Pretrained_BERTEmbd_with_NN,0.3,0.7,0.2,0.3,0.4
2,Pretrained_BERTEmbd_with_XGB,0.0,0.0,0.0,0.0,0.0


  t_stats = (row['Mean'] * np.sqrt(samples))/row['StdDev']


Unnamed: 0,Model,t_stats,HYPOTHESIS_CHECK
0,BERT_base_uncased,2.557448,Accept_Null
1,Pretrained_BERTEmbd_with_NN,4.41741,Reject_Null
2,Pretrained_BERTEmbd_with_XGB,,


In [3]:
df_subtracted.to_excel(f'../result_xls/mdl_compr_ttest_result/model_comparison_ttest.xlsx',
                       index=False)