In [54]:
#importing necessary libraries

import numpy as np
import pandas as pd
import statistics
import math
from time import time
from sklearn.metrics import accuracy_score,f1_score
from sklearn.tree import  DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB 
from sklearn.model_selection import train_test_split, StratifiedKFold

In [55]:
#Loading spambase dataset in dat_file.
dat_file = pd.read_csv('spambase.csv')

In [56]:
# Splitting output variable from rest data
x = dat_file.drop(columns = "spam")
y = dat_file["spam"]
#x = dat_file.iloc[:,:-1]
#y = dat_file.iloc[:,-1]

In [57]:
#initialization of models 
tree = DecisionTreeClassifier()
logreg = LogisticRegression(max_iter=5000)
Bayes = GaussianNB()

#lists of metric measures over ten skfolds.
TT_tree=[]
TT_logreg=[]
TT_Bayes=[]

Acc_tree=[]
Acc_logreg=[]
Acc_Bayes=[]

F1_tree=[]
F1_logreg=[]
F1_Bayes=[]

In [58]:
#initializing stratifiedkfold and shuffle is set to true for randomizing the samples of each fold for each class.
skf = StratifiedKFold(n_splits=10, shuffle=True)
skf.get_n_splits(x, y)    

10

In [59]:
#running 10 fold cross validation
for train_index, test_index in skf.split(x, y):
    #print("Train:", train_index, "Test:", test_index)
    x_train = x.iloc[train_index, :]
    y_train = y[train_index]
    x_test = x.iloc[test_index, :]
    y_test = y[test_index]
    
    # Evaluating model's training time,accuracy and f1_score of current fold.
    #Decision tree
    begin=time()                             
    tree.fit(x_train,y_train)
    end=time()                              
    t1=round(end-begin,4)                  
    y_pred=tree.predict(x_test)       
    acc1=accuracy_score(y_test, y_pred)  
    f1_1=f1_score(y_test, y_pred)  
    
    # logistic regression
    begin=time()
    logreg.fit(x_train,y_train)
    end=time()
    t2=round(end-begin,4)
    y_pred=logreg.predict(x_test)
    acc2=accuracy_score(y_test, y_pred)
    f1_2=f1_score(y_test, y_pred)
    
    #Bayes
    begin=time()
    Bayes.fit(x_train,y_train)
    end=time()
    t3=round(end-begin,4)
    y_pred=Bayes.predict(x_test)
    acc3=accuracy_score(y_test, y_pred)
    f1_3=f1_score(y_test, y_pred)
    
    
    #loading the output in list of metric measures.
    TT_tree.append(t1)
    Acc_tree.append(acc1)
    F1_tree.append(f1_1)
    
    TT_logreg.append(t2)
    Acc_logreg.append(acc2)
    F1_logreg.append(f1_2)

    TT_Bayes.append(t3)
    Acc_Bayes.append(acc3)
    F1_Bayes.append(f1_3)
    
    

In [60]:
#lists of metric measures are converted to numpy arrays.
TT_tree=np.array(TT_tree)
Acc_tree=np.array(Acc_tree)
F1_tree=np.array(F1_tree)

TT_logreg=np.array(TT_logreg)
Acc_logreg=np.array(Acc_logreg)
F1_logreg=np.array(F1_logreg)

TT_Bayes=np.array(TT_Bayes)
Acc_Bayes=np.array(Acc_Bayes)
F1_Bayes=np.array(F1_Bayes)


In [61]:
#calculating mean and standard deviation of metric measures using numpy.
avg=np.mean(TT_tree)
stddev=np.std(TT_tree)

avg=np.mean(Acc_tree)
stddev=np.std(Acc_tree)

avg=np.mean(F1_tree)
stddev=np.std(F1_tree)

avg=np.mean(TT_logreg)
stddev=np.std(TT_logreg)

avg=np.mean(Acc_logreg)
stddev=np.std(Acc_logreg)

avg=np.mean(F1_logreg)
stddev=np.std(F1_logreg)

avg=np.mean(TT_Bayes)
stddev=np.std(TT_Bayes)

avg=np.mean(Acc_Bayes)
stddev=np.std(Acc_Bayes)

avg=np.mean(F1_Bayes)
stddev=np.std(F1_Bayes)

In [62]:
#appending avg and standard deviation values to numpy arrays.
TT_tree=np.append(TT_tree,avg)
TT_tree=np.append(TT_tree,stddev)

Acc_tree=np.append(Acc_tree,avg)
Acc_tree=np.append(Acc_tree,stddev)

F1_tree=np.append(F1_tree,avg)
F1_tree=np.append(F1_tree,stddev)

TT_logreg=np.append(TT_logreg,avg)
TT_logreg=np.append(TT_logreg,stddev)

Acc_logreg=np.append(Acc_logreg,avg)
Acc_logreg=np.append(Acc_logreg,stddev)

F1_logreg=np.append(F1_logreg,avg)
F1_logreg=np.append(F1_logreg,stddev)

TT_Bayes=np.append(TT_Bayes,avg)
TT_Bayes=np.append(TT_Bayes,stddev)

Acc_Bayes=np.append(Acc_Bayes,avg)
Acc_Bayes=np.append(Acc_Bayes,stddev)

F1_Bayes=np.append(F1_Bayes,avg)
F1_Bayes=np.append(F1_Bayes,stddev)

In [63]:
#Creating dictionaries to store metric measures of decision tree,logistic regression,Naive Bayes.

TT_dict={'Decision Tree': TT_tree ,
         'logistic regression': TT_logreg,
         'Bayes': TT_Bayes }

Acc_dict={'Decision Tree': Acc_tree ,
          'logistic regression': Acc_logreg,
          'Bayes': Acc_Bayes }

F1_dict={'Decision Tree': F1_tree ,
         'logistic regression': F1_logreg, 
         'Bayes':F1_Bayes }


In [64]:
index_array = np.arange(1,11)
index_array = index_array.astype(str)
index_array = np.append(index_array,('avg','std_dev'))

#making dataframes from the recorded metrics and making a rank based version of these dataframes.

Acc_df = pd.DataFrame(Acc_dict, index = index_array)
TT_df = pd.DataFrame(TT_dict, index = index_array)
F1_df = pd.DataFrame(F1_dict, index = index_array)

print(F1_df)
print(Acc_df)
print(TT_df)


         Decision Tree  logistic regression     Bayes
1             0.876712             0.917127  0.788155
2             0.910995             0.901961  0.791762
3             0.882192             0.932584  0.802752
4             0.901961             0.905556  0.769231
5             0.895604             0.899713  0.820276
6             0.900000             0.901408  0.825472
7             0.916890             0.885057  0.790698
8             0.885870             0.929972  0.801865
9             0.871935             0.857143  0.807339
10            0.898630             0.924791  0.806452
avg           0.800400             0.800400  0.800400
std_dev       0.015512             0.015512  0.015512
         Decision Tree  logistic regression     Bayes
1             0.902386             0.934924  0.798265
2             0.926087             0.923913  0.802174
3             0.906522             0.947826  0.813043
4             0.923913             0.926087  0.778261
5             0.917391      

In [65]:
# Conducting friedman test and nemeyi test for F1 score of selected models.
F1_df = F1_df.rank(axis=1,method='dense',ascending=False)
F1_df = F1_df.drop(['avg','std_dev'])
F1_df.loc['avg_rank']=F1_df.mean()
print(F1_df)
avg_rank_acc = F1_df.mean(axis=1)[-1]

c1 = F1_df['Decision Tree'][-1]
c2 = F1_df['logistic regression'][-1]
c3 = F1_df['Bayes'][-1]

c = np.array([c1,c2,c3])
c = c - avg_rank_acc
sum_square = 10 * np.sum(np.square(c))

#sum_square = 10*((c1-avg_rank_acc)**2 + (c2-avg_rank_acc)**2 + (c3-avg_rank_acc)**2)
F1_df = F1_df.drop(['avg_rank'])
acc_np = F1_df.to_numpy()
sum_square2 = np.sum(np.square(acc_np-2))/20
f_stat = sum_square/sum_square2


print('friedman statistic : '+str(f_stat))

crit_val = 7.8 #for k=3, n = 10 and alpha = 0.05

if f_stat>crit_val:
    print("There  is  statistically  significant difference in behaviour between the three models. Null Hypothesis is rejected")
else:
    print("There is no difference in behaviour between three models")

q_alpha = 2.343
k = 3
n = 10
alpha = 0.05

crit_diff = q_alpha*math.sqrt(k*(k+1)/(6*n))

print("The critical difference is :" + str(crit_diff))
if abs(c1-c2) > crit_diff:
    print("The performance of Decision Tree and Logistic Regression is not equivalent.")

    
if abs(c1-c3) > crit_diff:
    print("The performance of Decision Tree and Bayes is not equivalent.")


if abs(c2-c3) > crit_diff:
    print("The performance of Bayes and Logistic Regression is not equivalent.")

          Decision Tree  logistic regression  Bayes
1                   2.0                  1.0    3.0
2                   1.0                  2.0    3.0
3                   2.0                  1.0    3.0
4                   2.0                  1.0    3.0
5                   2.0                  1.0    3.0
6                   2.0                  1.0    3.0
7                   1.0                  2.0    3.0
8                   2.0                  1.0    3.0
9                   1.0                  2.0    3.0
10                  2.0                  1.0    3.0
avg_rank            1.7                  1.3    3.0
friedman statistic : 15.8
There  is  statistically  significant difference in behaviour between the three models. Null Hypothesis is rejected
The critical difference is :1.0478214542564015
The performance of Decision Tree and Bayes is not equivalent.
The performance of Bayes and Logistic Regression is not equivalent.


In [66]:
  
# Conducting friedman test and nemeyi test for Accuracy of selected models 
#Friedman test
Acc_df = Acc_df.rank(axis=1,method='dense',ascending=False)
Acc_df = Acc_df.drop(['avg','std_dev'])
Acc_df.loc['avg_rank']=Acc_df.mean()
print(Acc_df)
avg_rank_acc = Acc_df.mean(axis=1)[-1]


a1 = Acc_df['Decision Tree'][-1]
a2 = Acc_df['logistic regression'][-1]
a3 = Acc_df['Bayes'][-1]


#sum of squared difference
a = np.array([a1,a2,a3])
a = a - avg_rank_acc
sum_square = 10 * np.sum(np.square(a))
#sum_square = 10*((a1-avg_rank_acc)**2 + (a2-avg_rank_acc)**2 + (a3-avg_rank_acc)**2)


Acc_df = Acc_df.drop(['avg_rank'])
acc_np = Acc_df.to_numpy() 
sum_square2 = np.sum(np.square(acc_np-2))/20

f_stat = sum_square/sum_square2


print('friedman statistic : '+str(f_stat))

crit_val = 7.8 #for k=3, n = 10 and alpha = 0.05

if f_stat > crit_val:
    print("There  is  statistically  significant difference in behaviour between the three models. Null Hypothesis is rejected")
else:
    print("There is no difference in behaviour between three models")

    
# Nemeyi test
q_alpha = 2.343 #from text book
k = 3
n = 10
alpha = 0.05

crit_diff = q_alpha * math.sqrt(k*(k+1)/(6*n))

print("The critical difference is :" + str(crit_diff))
if abs(a1-a2)>crit_diff:
    print("The performance of Decision Tree and Logistic Regression is not equivalent.")
    
if abs(a1-a3)>crit_diff:
    print("The performance of Decision Tree and Bayes is not equivalent.")

if abs(a2-a3)>crit_diff:
    print("The performance of Bayes and Logistic Regression is not equivalent.")
    

          Decision Tree  logistic regression  Bayes
1                   2.0                  1.0    3.0
2                   1.0                  2.0    3.0
3                   2.0                  1.0    3.0
4                   2.0                  1.0    3.0
5                   2.0                  1.0    3.0
6                   2.0                  1.0    3.0
7                   1.0                  2.0    3.0
8                   2.0                  1.0    3.0
9                   1.0                  2.0    3.0
10                  2.0                  1.0    3.0
avg_rank            1.7                  1.3    3.0
friedman statistic : 15.8
There  is  statistically  significant difference in behaviour between the three models. Null Hypothesis is rejected
The critical difference is :1.0478214542564015
The performance of Decision Tree and Bayes is not equivalent.
The performance of Bayes and Logistic Regression is not equivalent.


In [67]:
    
# Conducting friedman test and nemeyi test for training time of selected models 
TT_df = TT_df.rank(axis=1,method='dense',ascending=True)
TT_df = TT_df.drop(['avg','std_dev'])
TT_df.loc['avg_rank']=TT_df.mean()
print(TT_df)
avg_rank_acc = TT_df.mean(axis=1)[-1]


b1 = TT_df['Decision Tree'][-1]
b2 = TT_df['logistic regression'][-1]
b3 = TT_df['Bayes'][-1]

b = np.array([b1,b2,b3])
b = b - avg_rank_acc
sum_square = 10 * np.sum(np.square(b))
#sum_square = 10*((b1-avg_rank_acc)**2 + (b2-avg_rank_acc)**2 + (b3-avg_rank_acc)**2)

TT_df = TT_df.drop(['avg_rank'])
acc_np = TT_df.to_numpy()
sum_square2 = np.sum(np.square(acc_np-2))/20
f_stat = sum_square/sum_square2


print('friedman statistic : '+str(f_stat))

crit_val = 7.8 #for k=3, n = 10 and alpha = 0.05

if f_stat > crit_val:
    print("There  is  statistically  significant difference in behaviour between the three models. Null Hypothesis is rejected")
else:
    print("There is no difference in behaviour between three models")

# Nemeyi test
q_alpha = 2.343
k = 3
n = 10
alpha = 0.05

crit_diff = q_alpha * math.sqrt(k*(k+1)/(6*n))

print("The critical difference is :" + str(crit_diff))
if abs(b1-b2)>crit_diff:
    print("The performance of Decision Tree and Logistic Regression is not equivalent.")

if abs(b1-b3)>crit_diff:
    print("The performance of Decision Tree and Bayes is not equivalent.")

if abs(b2-b3)>crit_diff:
    print("The performance of Bayes and Logistic Regression is not equivalent.")

          Decision Tree  logistic regression  Bayes
1                   2.0                  3.0    1.0
2                   2.0                  3.0    1.0
3                   2.0                  3.0    1.0
4                   2.0                  3.0    1.0
5                   2.0                  3.0    1.0
6                   2.0                  3.0    1.0
7                   2.0                  3.0    1.0
8                   2.0                  3.0    1.0
9                   2.0                  3.0    1.0
10                  2.0                  3.0    1.0
avg_rank            2.0                  3.0    1.0
friedman statistic : 20.0
There  is  statistically  significant difference in behaviour between the three models. Null Hypothesis is rejected
The critical difference is :1.0478214542564015
The performance of Bayes and Logistic Regression is not equivalent.


In [53]:
dat_file.describe()

Unnamed: 0,word_freq_make,word_freq_address,word_freq_all,word_freq_3d,word_freq_our,word_freq_over,word_freq_remove,word_freq_internet,word_freq_order,word_freq_mail,...,char_freq_;,char_freq_(,char_freq_[,char_freq_!,char_freq_$,char_freq_#,capital_run_length_average,capital_run_length_longest,capital_run_length_total,spam
count,4601.0,4601.0,4601.0,4601.0,4601.0,4601.0,4601.0,4601.0,4601.0,4601.0,...,4601.0,4601.0,4601.0,4601.0,4601.0,4601.0,4601.0,4601.0,4601.0,4601.0
mean,0.104553,0.213015,0.280656,0.064769,0.312223,0.095901,0.114208,0.105295,0.090067,0.239413,...,0.038575,0.13903,0.016976,0.269071,0.075811,0.044238,5.191515,52.172789,283.289285,0.394045
std,0.305358,1.290575,0.504143,1.392893,0.672513,0.273824,0.391441,0.401071,0.278616,0.644755,...,0.243471,0.270355,0.109394,0.815672,0.245882,0.429342,31.729449,194.89131,606.347851,0.488698
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.588,6.0,35.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.065,0.0,0.0,0.0,0.0,2.276,15.0,95.0,0.0
75%,0.0,0.0,0.42,0.0,0.38,0.0,0.0,0.0,0.0,0.16,...,0.0,0.188,0.0,0.315,0.052,0.0,3.706,43.0,266.0,1.0
max,4.54,14.28,5.1,43.0,10.0,5.88,7.27,11.11,5.26,18.18,...,4.385,9.752,4.081,32.478,6.003,19.829,1102.5,9989.0,15841.0,1.0
