# Comparação do desempenho dos classificadores por meio de métodos estatísticos

## Importando bibliotecas e resultados da validação cruzada

In [20]:
from scipy.stats import friedmanchisquare
import pandas as pd
import numpy as np
import scikit_posthocs as sp

knn = pd.read_excel('../results/metrics_cv_wine_KNN.xlsx')
lvq = pd.read_excel('../results/metrics_cv_wine_LVQ.xlsx')
ad = pd.read_excel('../results/metrics_cv_wine_DTR.xlsx')
svm = pd.read_excel('../results/metrics_cv_wine_SVC.xlsx')
mlp = pd.read_excel('../results/metrics_cv_wine_MLP.xlsx')

Lista contendo as métricas que foram computadas:

In [23]:
metrics = knn.columns

print(metrics)

Index(['train_accuracy', 'test_accuracy', 'f1_score', 'precision', 'recall'], dtype='object')


In [25]:
knn.describe()

Unnamed: 0,train_accuracy,test_accuracy,f1_score,precision,recall
count,10.0,10.0,10.0,10.0,10.0
mean,1.0,0.665383,0.658328,0.660058,0.665383
std,0.0,0.012441,0.011402,0.015126,0.012441
min,1.0,0.64,0.635532,0.635671,0.64
25%,1.0,0.661017,0.655333,0.654079,0.661017
50%,1.0,0.665385,0.658562,0.656018,0.665385
75%,1.0,0.668464,0.661204,0.663641,0.668464
max,1.0,0.686154,0.67983,0.68789,0.686154


In [27]:
lvq.describe()

Unnamed: 0,train_accuracy,test_accuracy,f1_score,precision,recall
count,10.0,10.0,10.0,10.0,10.0
mean,0.466625,0.461903,0.329856,0.281051,0.461903
std,0.04875,0.042758,0.104898,0.146397,0.042758
min,0.436389,0.435385,0.264124,0.18956,0.435385
25%,0.436463,0.436923,0.265709,0.190902,0.436923
50%,0.436549,0.43726,0.266056,0.191196,0.43726
75%,0.503848,0.480354,0.406081,0.387106,0.480354
max,0.546092,0.550769,0.517512,0.512919,0.550769


In [29]:
ad.describe()

Unnamed: 0,train_accuracy,test_accuracy,f1_score,precision,recall
count,10.0,10.0,10.0,10.0,10.0
mean,0.593744,0.542097,0.519369,0.51707,0.542097
std,0.005117,0.022504,0.023634,0.024586,0.022504
min,0.587652,0.516179,0.490454,0.483972,0.516179
25%,0.589277,0.526538,0.502276,0.497454,0.526538
50%,0.593416,0.537341,0.511168,0.517237,0.537341
75%,0.597871,0.551538,0.533201,0.529509,0.551538
max,0.602257,0.586154,0.561809,0.561117,0.586154


In [31]:
svm.describe()

Unnamed: 0,train_accuracy,test_accuracy,f1_score,precision,recall
count,10.0,10.0,10.0,10.0,10.0
mean,0.537787,0.534246,0.462984,0.414596,0.534246
std,0.002373,0.015984,0.013881,0.012667,0.015984
min,0.534975,0.508475,0.440601,0.392618,0.508475
25%,0.53583,0.530227,0.45968,0.41013,0.530227
50%,0.537797,0.535385,0.46459,0.416784,0.535385
75%,0.539228,0.542692,0.470003,0.421668,0.542692
max,0.542066,0.557781,0.483408,0.432666,0.557781


## Teste de Friedman

In [34]:
for metric in metrics:
    statistic, p_value = friedmanchisquare(knn[metric], ad[metric], svm[metric])
    print(f"Estatística: {statistic}, p-value: {p_value}")

    if p_value < 0.05:
        print(f"Para a métrica {metric} temos diferença estatística significativa entre os classificadores.\n")
    else:
        print(f"Para a métrica {metric} NÃO temos diferença estatística significativa entre os classificadores.\n")

Estatística: 20.0, p-value: 4.539992976248486e-05
Para a métrica train_accuracy temos diferença estatística significativa entre os classificadores.

Estatística: 15.0, p-value: 0.0005530843701478337
Para a métrica test_accuracy temos diferença estatística significativa entre os classificadores.

Estatística: 20.0, p-value: 4.539992976248486e-05
Para a métrica f1_score temos diferença estatística significativa entre os classificadores.

Estatística: 20.0, p-value: 4.539992976248486e-05
Para a métrica precision temos diferença estatística significativa entre os classificadores.

Estatística: 15.0, p-value: 0.0005530843701478337
Para a métrica recall temos diferença estatística significativa entre os classificadores.



## Teste Post-Hoc: Nemenyi

In [37]:
for metric in metrics:
    nemenyi_groups = np.array([knn[metric], ad[metric], svm[metric], lvq[metric]])
    
    nemenyi_groups = nemenyi_groups.T

    nemenyi_results = sp.posthoc_nemenyi_friedman(nemenyi_groups)
    
    print(f'\nResultados do teste post-hoc de Nemenyi para a métrica \033[1m{metric}\033[0m: \n{nemenyi_results}\n')
    
    for i in range(len(nemenyi_results)):
        for j in range(i + 1, len(nemenyi_results)):
            if nemenyi_results.iloc[i, j] < 0.05:
                print(f"Classificadores {nemenyi_results.index[i]} vs {nemenyi_results.columns[j]} têm diferença significativa\n")


Resultados do teste post-hoc de Nemenyi para a métrica [1mtrain_accuracy[0m: 
          0         1         2         3
0  1.000000  0.307130  0.001572  0.001000
1  0.307130  1.000000  0.225871  0.005517
2  0.001572  0.225871  1.000000  0.507386
3  0.001000  0.005517  0.507386  1.000000

Classificadores 0 vs 2 têm diferença significativa

Classificadores 0 vs 3 têm diferença significativa

Classificadores 1 vs 3 têm diferença significativa


Resultados do teste post-hoc de Nemenyi para a métrica [1mtest_accuracy[0m: 
          0         1         2         3
0  1.000000  0.046280  0.028569  0.001000
1  0.046280  1.000000  0.900000  0.072567
2  0.028569  0.900000  1.000000  0.109694
3  0.001000  0.072567  0.109694  1.000000

Classificadores 0 vs 1 têm diferença significativa

Classificadores 0 vs 2 têm diferença significativa

Classificadores 0 vs 3 têm diferença significativa


Resultados do teste post-hoc de Nemenyi para a métrica [1mf1_score[0m: 
         0         1         2