In [None]:
#Code Written by Sai Sukheshwar Boganadula and Bala Subramanyam Pavan Kumar Kasturi

In [3]:
import time
import numpy as np
import math
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score
from scipy.stats import rankdata, chi2

In [4]:
link = 'https://archive.ics.uci.edu/static/public/94/data.csv'
data = pd.read_csv(link, sep=',')

In [5]:
data

Unnamed: 0,word_freq_make,word_freq_address,word_freq_all,word_freq_3d,word_freq_our,word_freq_over,word_freq_remove,word_freq_internet,word_freq_order,word_freq_mail,...,char_freq_;,char_freq_(,char_freq_[,char_freq_!,char_freq_$,char_freq_#,capital_run_length_average,capital_run_length_longest,capital_run_length_total,Class
0,0.00,0.64,0.64,0.0,0.32,0.00,0.00,0.00,0.00,0.00,...,0.000,0.000,0.0,0.778,0.000,0.000,3.756,61,278,1
1,0.21,0.28,0.50,0.0,0.14,0.28,0.21,0.07,0.00,0.94,...,0.000,0.132,0.0,0.372,0.180,0.048,5.114,101,1028,1
2,0.06,0.00,0.71,0.0,1.23,0.19,0.19,0.12,0.64,0.25,...,0.010,0.143,0.0,0.276,0.184,0.010,9.821,485,2259,1
3,0.00,0.00,0.00,0.0,0.63,0.00,0.31,0.63,0.31,0.63,...,0.000,0.137,0.0,0.137,0.000,0.000,3.537,40,191,1
4,0.00,0.00,0.00,0.0,0.63,0.00,0.31,0.63,0.31,0.63,...,0.000,0.135,0.0,0.135,0.000,0.000,3.537,40,191,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4596,0.31,0.00,0.62,0.0,0.00,0.31,0.00,0.00,0.00,0.00,...,0.000,0.232,0.0,0.000,0.000,0.000,1.142,3,88,0
4597,0.00,0.00,0.00,0.0,0.00,0.00,0.00,0.00,0.00,0.00,...,0.000,0.000,0.0,0.353,0.000,0.000,1.555,4,14,0
4598,0.30,0.00,0.30,0.0,0.00,0.00,0.00,0.00,0.00,0.00,...,0.102,0.718,0.0,0.000,0.000,0.000,1.404,6,118,0
4599,0.96,0.00,0.00,0.0,0.32,0.00,0.00,0.00,0.00,0.00,...,0.000,0.057,0.0,0.000,0.000,0.000,1.147,5,78,0


In [6]:


X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


algorithms = {
    'Logistic Regression': LogisticRegression(max_iter=5000),
    'Random Forest': RandomForestClassifier(),
    'Support Vector Machine': SVC()
}


accuracy_data = []
f1_data = []
training_time_data = []



In [7]:

n_splits = 10
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

for name, clf in algorithms.items():
    for fold, (train_idx, test_idx) in enumerate(skf.split(X, y), 1):
        X_train_fold, X_test_fold = X[train_idx], X[test_idx]
        y_train_fold, y_test_fold = y[train_idx], y[test_idx]

        start_time = time.time()
        clf.fit(X_train_fold, y_train_fold)
        training_time_fold = time.time() - start_time

        y_pred = clf.predict(X_test_fold)
        accuracy_fold = accuracy_score(y_test_fold, y_pred)
        f1_fold = f1_score(y_test_fold, y_pred)

        accuracy_data.append([fold, name, accuracy_fold])
        f1_data.append([fold, name, f1_fold])
        training_time_data.append([fold, name, training_time_fold])


columns_acc = ['Fold', 'Classifier', 'Accuracy']
columns_f1 = ['Fold', 'Classifier', 'F1-Score']
columns_training_time = ['Fold', 'Classifier', 'Training-Time']

df_accuracy = pd.DataFrame(accuracy_data, columns=columns_acc)
df_f1 = pd.DataFrame(f1_data, columns=columns_f1)
df_training_time = pd.DataFrame(training_time_data, columns=columns_training_time)



In [8]:
def display_results_with_ranks_and_stats(df_pivot, metric, invert=False):
    print(f"\nTable - {metric}")
    header = "Fold"
    for classifier in df_pivot.columns:
        header += f"\t{classifier}"
    print(header)

    ranked_values = np.zeros_like(df_pivot.values)
    for i in range(n_splits):
        if invert:
            ranked_values[i, :] = rankdata(df_pivot.values[i, :])
        else:
            ranked_values[i, :] = rankdata(-df_pivot.values[i, :])

    for i in range(n_splits):
        row = f"{i + 1}"
        for classifier in df_pivot.columns:
            rank = int(ranked_values[i, df_pivot.columns.get_loc(classifier)])
            value = df_pivot.at[i + 1, classifier]
            row += f"\t{value:.6f}({rank})"
        print(row)

    avg_ranks = np.mean(ranked_values, axis=0)
    avg_std_row = "Average Rank"
    for avg_rank in avg_ranks:
        avg_std_row += f"\t{avg_rank:.2f}"
    print(avg_std_row)

    avg = df_pivot.mean(axis=0)
    std = df_pivot.std(axis=0)
    avg_row = "Average"
    for classifier in df_pivot.columns:
        avg_row += f"\t{avg[classifier]:.6f}"
    print(avg_row)

    std_row = "Standard Deviation"
    for classifier in df_pivot.columns:
        std_row += f"\t{std[classifier]:.6f}"
    print(std_row)

In [9]:
df_accuracy_pivot = df_accuracy.pivot(index='Fold', columns='Classifier', values='Accuracy')
df_f1_pivot = df_f1.pivot(index='Fold', columns='Classifier', values='F1-Score')
df_training_time_pivot = df_training_time.pivot(index='Fold', columns='Classifier', values='Training-Time')

In [10]:
def calculate_ranks_and_critical_difference(df_pivot):
    ranked_values = np.zeros_like(df_pivot.values)
    for i in range(n_splits):
        ranked_values[i, :] = rankdata(-df_pivot.values[i, :])

    avg_ranks = np.mean(ranked_values, axis=0)
    k=3
    n=10
    qα=2.343
    CD=qα*(math.sqrt(k*(k+1)/(6*n)))
    critical_difference = CD  # Corrected critical difference value
    return avg_ranks, critical_difference

In [11]:
display_results_with_ranks_and_stats(df_accuracy_pivot, 'Accuracy')
display_results_with_ranks_and_stats(df_f1_pivot, 'F1 Score')
display_results_with_ranks_and_stats(df_training_time_pivot, 'Training Time', invert=True)


Table - Accuracy
Fold	Logistic Regression	Random Forest	Support Vector Machine
1	0.915401(2)	0.947939(1)	0.715835(3)
2	0.923913(2)	0.954348(1)	0.719565(3)
3	0.932609(2)	0.960870(1)	0.734783(3)
4	0.936957(2)	0.963043(1)	0.710870(3)
5	0.913043(2)	0.943478(1)	0.719565(3)
6	0.936957(2)	0.958696(1)	0.739130(3)
7	0.934783(2)	0.963043(1)	0.693478(3)
8	0.932609(2)	0.954348(1)	0.706522(3)
9	0.939130(2)	0.947826(1)	0.702174(3)
10	0.915217(2)	0.950000(1)	0.706522(3)
Average Rank	2.00	1.00	3.00
Average	0.928062	0.954359	0.714844
Standard Deviation	0.010194	0.006939	0.014159

Table - F1 Score
Fold	Logistic Regression	Random Forest	Support Vector Machine
1	0.891967(2)	0.933333(1)	0.552901(3)
2	0.901961(2)	0.941176(1)	0.559727(3)
3	0.913165(2)	0.950276(1)	0.561151(3)
4	0.918768(2)	0.952646(1)	0.572347(3)
5	0.890110(2)	0.928177(1)	0.565657(3)
6	0.920110(2)	0.947368(1)	0.565217(3)
7	0.915730(2)	0.952113(1)	0.546624(3)
8	0.912676(2)	0.940845(1)	0.565916(3)
9	0.918605(2)	0.932203(1)	0.538721(3)
10	0.890

In [12]:
avg_ranks_acc, crit_diff_acc = calculate_ranks_and_critical_difference(df_accuracy_pivot)
avg_ranks_f1, crit_diff_f1 = calculate_ranks_and_critical_difference(df_f1_pivot)
avg_ranks_time, crit_diff_time = calculate_ranks_and_critical_difference(df_training_time_pivot)

In [13]:
df_accuracy_ranks = df_accuracy_pivot.rank(axis=1, method='average', ascending=False)
df_f1_ranks = df_f1_pivot.rank(axis=1, method='average', ascending=False)
df_training_time_ranks = df_training_time_pivot.rank(axis=1, method='average', ascending=True)  # Lower training time is better

In [14]:
n=len(df_accuracy_ranks) #folds=10
k=df_accuracy_ranks.shape[1]      #models=3
Rbar=(k+1)/2          #Average rank = 2

print(f"n = {n}")
print(f"k = {k}")
print(f"Rbar = {Rbar}")

n = 10
k = 3
Rbar = 2.0


In [15]:
def friedman(n,k,Rbar,mean_rank_list,rank_table):
  #According to text book Friedman Statistic = First term/ Second Term

  #Calculating first term (Read report or text book)
  inner_part_1=0
  for i in range(k):
    inner_part_1 += (mean_rank_list[i]-2)**2
  first_term= n * inner_part_1

  #Calcing second term
  inner_part_2 = 0
  for i in range(n):
    for j in range (k):
      inner_part_2 += (rank_table.iloc[i,j]-Rbar)**2
  second_term = inner_part_2 / (n *(k - 1))

  friedman_statistic = first_term / second_term

  return friedman_statistic

CV = 6.20 #Critical value

def checkhypothesis(friedman_statistic, metric, CV):
  if (friedman_statistic > CV):
    print(f"As Friedman statistic for {metric}: {friedman_statistic}  > Critical Value:{CV}")
    print(f"According to Friedman's test, Null Hypothesis is False. There is a significant difference between {metric} scores of the machine learning algorithms.")
    print(" ")
    print("Hence performing Nemenyi test to find performance differences between classifiers")
    return False
  elif(friedman_statistic < CV):
    print(f"As Friedman statistic for {metric}: {friedman_statistic} < Critical Value:{CV}")
    print(f"According to Friedman's test, Null Hypothesis is True. There is no significant difference between {metric} scores of the machine learning algorithms.")
    print(" ")
    print("Nemenyi tests not needed")
    return True

In [16]:
qα=2.343
CD=qα*(math.sqrt(k*(k+1)/(6*n)))
print(f"Critical Difference is {CD}")

Critical Difference is 1.0478214542564015


In [17]:
def Nemenyitest(metric, clf_names, rank_mean_list):
  CD = 1.0478214542564015
  for i in range(k):
    for j in range(i + 1, k):
      difference = abs(rank_mean_list[i] - rank_mean_list[j])
      if difference > CD:
        if(rank_mean_list[i] < rank_mean_list[j]):
          print(f"{clf_names[j]} better than {clf_names[i] } in {metric} scores with a difference of {round(difference,3)}")
        else:
          print(f"{clf_names[i]} better than {clf_names[j] } in {metric} scores with a difference of {round(difference,3)}")
      else:
        print(f"{clf_names[j]}, {clf_names[i] } perform equally well in {metric} scores performance")

In [18]:
def mean_rank_list(table):
  clfnames=['Logistic Regression', 'Random Forest', 'Support Vector Machine']
  rank_mean_list=[table[clfnames[i]].mean() for i in range (0,3)]
  return rank_mean_list



In [19]:
acc_rank_mean_list=mean_rank_list(df_accuracy_ranks)
f1_rank_mean_list=mean_rank_list(df_f1_ranks)
time_rank_mean_list=mean_rank_list(df_training_time_ranks)

In [20]:
metrics=["accuracy", "f1_score", "Training Time"]
mean_rank_lists=[acc_rank_mean_list, f1_rank_mean_list, time_rank_mean_list]
rank_table_lists=[df_accuracy_ranks, df_f1_ranks, df_training_time_ranks]

#test pipeline:
#perform friedman test
#check hypothesis
#if null hypothesis false, perform nemenyi test
#show differences

def test_pipeline(metrics, mean_rank_lists, rank_table_lists, clf_names, n,k,Rbar,CV):
  for i in range (0,3):
    friedman_statistic_value = friedman(n,k,Rbar,mean_rank_lists[i],rank_table_lists[i])
    print(f"Performing Friedman's test for **{metrics[i]}**")
    print(" ")
    if(checkhypothesis(friedman_statistic_value, metrics[i], CV)==False):
      print("")
      Nemenyitest(metrics[i], clf_names, mean_rank_lists[i])
      print("-----------------------------------------------------------------------------------------------------------------------------------------------------------")
      print("")


In [21]:
clfnames=['Logistic Regression', 'Random Forest', 'Support Vector Machine']
test_pipeline(metrics, mean_rank_lists, rank_table_lists, clfnames, n,k,Rbar,CV)

Performing Friedman's test for **accuracy**
 
As Friedman statistic for accuracy: 20.0  > Critical Value:6.2
According to Friedman's test, Null Hypothesis is False. There is a significant difference between accuracy scores of the machine learning algorithms.
 
Hence performing Nemenyi test to find performance differences between classifiers

Random Forest, Logistic Regression perform equally well in accuracy scores performance
Support Vector Machine, Logistic Regression perform equally well in accuracy scores performance
Support Vector Machine better than Random Forest in accuracy scores with a difference of 2.0
-----------------------------------------------------------------------------------------------------------------------------------------------------------

Performing Friedman's test for **f1_score**
 
As Friedman statistic for f1_score: 20.0  > Critical Value:6.2
According to Friedman's test, Null Hypothesis is False. There is a significant difference between f1_score scores 