# MODEL EVALUATION + FRIEDMAN TEST

### Requirements

In [184]:
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_blobs
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd
from sklearn import metrics, datasets
from itertools import combinations
import pingouin as pg
import scikit_posthocs as sp

### Leave one out cross valiadation

In [185]:
def apply_LeaveOneOut(X,y):
    cv = LeaveOneOut()
    # enumerate splits
    y_true, y_pred = list(), list()
    for train_ix, test_ix in cv.split(X):
        # split data
        X_train, X_test = X[train_ix, :], X[test_ix, :]
        y_train, y_test = y[train_ix], y[test_ix]
        # fit model
        model = RandomForestClassifier(random_state=42)
        model.fit(X_train, y_train)
        # evaluate model
        yhat = model.predict(X_test)
        # store
        y_true.append(y_test[0])
        y_pred.append(yhat[0])
    # calculate accuracy
    acc = accuracy_score(y_true, y_pred)
    print('Accuracy Score: %.3f' % acc)
    return acc,y_true,y_pred

### Calculate metrics

In [186]:
def perf_measure(y_actual, y_hat):
    TP = 0
    FP = 0
    TN = 0
    FN = 0
    
    for i in range(len(y_hat)): 
        if y_actual[i]==y_hat[i]==1:
           TP += 1
        if y_hat[i]==1 and y_actual[i]!=y_hat[i]:
           FP += 1
        if y_actual[i]==y_hat[i]==0:
           TN += 1
        if y_hat[i]==0 and y_actual[i]!=y_hat[i]:
           FN += 1
    print ('True Positive :',TP)
    print ('True Negative :',TN)  
    print ('False Positive :',FP)  
    print ('False Negative :',FN)
    
    return TP, FP, TN, FN 

### All merged

In [187]:
X,y = make_blobs(n_samples=100, random_state=42)
acc,y_true,y_pred = apply_LeaveOneOut(X,y)
TP,FP,TN,FN = perf_measure(y_true,y_pred)

Accuracy Score: 1.000
True Positive : 33
True Negative : 34
False Positive : 0
False Negative : 0


### Friedman Test

In [188]:
def friedman_test(algo_performance,a_list):
    for alpha in a_list:
        results = pg.friedman(data=algo_performance)
        q, p = results['Q'].values.tolist()[0], results['p-unc'].values.tolist()[0]
        print(f"Q={round(q,3)}, p={p}")
        # interpret
        print ('With alpha = ',alpha)
        if p > alpha:
            # They dont have statistically significant differences between them
            print('Test Result: Same distributions\n')
        else:
            # They have statistically significant differences between them
            print('Test Result: Different distributions\n')

In [189]:
names=['C4.5','1-NN','NaiveBayes','Kernel','CN2']
a_list = [0.001,0.005,0.01,0.05,0.1]
algo_performance = pd.read_csv('algo_performance.csv',sep=',',index_col=0)
algo_performance = algo_performance.reset_index(level=0)
print ('FRIEDMAN TEST:')
friedman_test(algo_performance,a_list)

FRIEDMAN TEST:
Q=39.913, p=4.512033059024903e-08
With alpha =  0.001
Test Result: Different distributions

Q=39.913, p=4.512033059024903e-08
With alpha =  0.005
Test Result: Different distributions

Q=39.913, p=4.512033059024903e-08
With alpha =  0.01
Test Result: Different distributions

Q=39.913, p=4.512033059024903e-08
With alpha =  0.05
Test Result: Different distributions

Q=39.913, p=4.512033059024903e-08
With alpha =  0.1
Test Result: Different distributions



### Nemenyi Test

In [190]:
print('\nNEMENYI TEST:') 
print('ALGORITHMS ADJACENCY MATRIX OF P VALUES')
results = sp.posthoc_nemenyi(np.array(algo_performance.T)).values.tolist()
results = pd.DataFrame(results,columns=names,index=names)
print(results)


NEMENYI TEST:
ALGORITHMS ADJACENCY MATRIX OF P VALUES
                C4.5      1-NN  NaiveBayes    Kernel       CN2
C4.5        1.000000  0.450299    0.979836  0.001110  0.727846
1-NN        0.450299  1.000000    0.809691  0.237537  0.993329
NaiveBayes  0.979836  0.809691    1.000000  0.011000  0.963403
Kernel      0.001110  0.237537    0.011000  1.000000  0.089031
CN2         0.727846  0.993329    0.963403  0.089031  1.000000


Είναι προφανές πως οι αλγόριθμοι έχουν μεταξύ τους στατιστικά σημαντικές διαφορές.
Αυτό αποδείχθηκε από το Friedman test για όλα τα alpha που δοκιμάστηκαν.
Το παραπάνω συμπέρασμα αποδείχθηκε επίσης από το Nemenyi Test. Όπου είναι ξεκάθαρο πως τα παρακάτω ζευγάρια αλγορίθμων έχουν μεταξύ τους στατιστικά σημαντικές διαφορές (a = 0.01)
1) Kernel - C4.5

2) NaiveBayes - Kernel

3) Kernel & CN2

### Pairwise Friedman Test of Algorithms (a = 0.01)

Παραπάνω αποδείξαμε πριν με το  Nemenyi Test oτι κάποια συγκεκριμένα ζευγάρια αλγορίθμων έχουν μεταξύ τους σημαντικές στατιστικές διαφορές.
Παρακάτω αποδεικνύουμε την ενλόγω υπόθεση και με το Friedman Test.

In [191]:
def friedman_test_sep(algo_performance,all_pairs):
    for names in all_pairs:
        results = pg.friedman(data=algo_performance[list(names)])
        q, p = results['Q'].values.tolist()[0], results['p-unc'].values.tolist()[0]
        print('Algorithms = ', names[0], '&', names[1])
        print(f"Q={round(q,3)}, p={p}")
        # interpret
        if p > 0.001:
            # They dont have statistically significant differences between them
            print('Test Result: Same distributions\n')
        else:
            # They have statistically significant differences between them
            print('Test Result: Different distributions\n')

In [192]:
all_pairs = list(combinations(names, 2))
friedman_test_sep(algo_performance,all_pairs)

Algorithms =  C4.5 & 1-NN
Q=7.759, p=0.005345676872654246
Test Result: Same distributions

Algorithms =  C4.5 & NaiveBayes
Q=0.133, p=0.7150006546880892
Test Result: Same distributions

Algorithms =  C4.5 & Kernel
Q=19.2, p=1.1771339097615013e-05
Test Result: Different distributions

Algorithms =  C4.5 & CN2
Q=9.966, p=0.0015949936169505205
Test Result: Same distributions

Algorithms =  1-NN & NaiveBayes
Q=4.8, p=0.028459736916310617
Test Result: Same distributions

Algorithms =  1-NN & Kernel
Q=8.533, p=0.003487004892141391
Test Result: Same distributions

Algorithms =  1-NN & CN2
Q=0.533, p=0.4652088184521417
Test Result: Same distributions

Algorithms =  NaiveBayes & Kernel
Q=13.333, p=0.00026072963285531705
Test Result: Different distributions

Algorithms =  NaiveBayes & CN2
Q=7.0, p=0.008150971593502695
Test Result: Same distributions

Algorithms =  Kernel & CN2
Q=13.333, p=0.00026072963285531705
Test Result: Different distributions

