# Comparação do desempenho dos classificadores por meio de métodos estatísticos

## Importando bibliotecas e resultados da validação cruzada

In [4]:
from scipy.stats import friedmanchisquare
import pandas as pd
import numpy as np
import scikit_posthocs as sp

knn = pd.read_excel('../resultados-cv/metrics_cv_adult_KNN.xlsx')
lvq = pd.read_excel('../resultados-cv/metrics_cv_adult_LVQ.xlsx')
ad = pd.read_excel('../resultados-cv/metrics_cv_adult_DTR.xlsx')
svm = pd.read_excel('../resultados-cv/metrics_cv_adult_SVC.xlsx')

Lista contendo as métricas que foram computadas:

In [5]:
metrics = knn.columns

print(metrics)

Index(['train_accuracy', 'test_accuracy', 'f1_score', 'AUC', 'precision',
       'recall'],
      dtype='object')


In [9]:
knn.describe()

Unnamed: 0,train_accuracy,test_accuracy,f1_score,AUC,precision,recall
count,10.0,10.0,10.0,10.0,10.0,10.0
mean,0.977101,0.860788,0.643807,0.772501,0.664299,0.624652
std,0.000319,0.00419,0.011054,0.007162,0.011806,0.013534
min,0.976541,0.855805,0.625911,0.759709,0.652139,0.598761
25%,0.976961,0.858636,0.637917,0.768371,0.656033,0.61677
50%,0.977104,0.859473,0.642744,0.772896,0.660645,0.626502
75%,0.977295,0.861891,0.646353,0.775332,0.669225,0.633212
max,0.977546,0.870474,0.667468,0.786269,0.691286,0.645236


In [10]:
lvq.describe()

Unnamed: 0,train_accuracy,test_accuracy,f1_score,AUC,precision,recall
count,10.0,10.0,10.0,10.0,10.0,10.0
mean,0.834892,0.834368,0.384643,0.642036,0.485655,0.319948
std,0.025224,0.024934,0.266516,0.099263,0.335438,0.223989
min,0.798453,0.798377,0.0,0.499902,0.0,0.0
25%,0.810612,0.810243,0.128662,0.545409,0.16474,0.102053
50%,0.84806,0.8473,0.526275,0.687924,0.689848,0.420991
75%,0.851918,0.850304,0.560822,0.70953,0.703259,0.472502
max,0.854265,0.856742,0.587629,0.730532,0.709635,0.530233


In [12]:
ad.describe()

Unnamed: 0,train_accuracy,test_accuracy,f1_score,AUC,precision,recall
count,10.0,10.0,10.0,10.0,10.0,10.0
mean,0.849698,0.846977,0.514884,0.682785,0.712502,0.407817
std,0.006661,0.007714,0.049104,0.029392,0.036933,0.068644
min,0.838454,0.831305,0.453212,0.650253,0.653061,0.336174
25%,0.847371,0.844452,0.488886,0.666951,0.700216,0.370643
50%,0.848216,0.846922,0.498487,0.671819,0.709935,0.380472
75%,0.84922,0.851202,0.531853,0.689291,0.730233,0.41615
max,0.861564,0.859885,0.599465,0.734607,0.779174,0.537209


In [13]:
svm.describe()

Unnamed: 0,train_accuracy,test_accuracy,f1_score,AUC,precision,recall
count,10.0,10.0,10.0,10.0,10.0,10.0
mean,0.870472,0.870104,0.641576,0.760578,0.722288,0.577161
std,0.000255,0.002687,0.006929,0.004083,0.010416,0.007799
min,0.870063,0.864856,0.630231,0.755241,0.702188,0.568992
25%,0.870258,0.868661,0.637744,0.757858,0.716662,0.572231
50%,0.870512,0.870708,0.641926,0.760403,0.72338,0.574361
75%,0.87066,0.872113,0.646075,0.762153,0.727883,0.578764
max,0.870878,0.873439,0.651341,0.767652,0.739521,0.592564


## Teste de Friedman

In [7]:
for metric in metrics:
    statistic, p_value = friedmanchisquare(knn[metric], ad[metric], svm[metric])
    print(f"Estatística: {statistic}, p-value: {p_value}")

    if p_value < 0.05:
        print(f"Para a métrica {metric} temos diferença estatística significativa entre os classificadores.\n")
    else:
        print(f"Para a métrica {metric} NÃO temos diferença estatística significativa entre os classificadores.\n")

Estatística: 20.0, p-value: 4.539992976248486e-05
Para a métrica train_accuracy temos diferença estatística significativa entre os classificadores.

Estatística: 18.200000000000017, p-value: 0.0001116658084901137
Para a métrica test_accuracy temos diferença estatística significativa entre os classificadores.

Estatística: 15.0, p-value: 0.0005530843701478337
Para a métrica f1_score temos diferença estatística significativa entre os classificadores.

Estatística: 18.200000000000017, p-value: 0.0001116658084901137
Para a métrica AUC temos diferença estatística significativa entre os classificadores.

Estatística: 11.400000000000006, p-value: 0.003345965457471265
Para a métrica precision temos diferença estatística significativa entre os classificadores.

Estatística: 20.0, p-value: 4.539992976248486e-05
Para a métrica recall temos diferença estatística significativa entre os classificadores.



## Teste Post-Hoc: Nemenyi

In [8]:
for metric in metrics:
    nemenyi_groups = np.array([knn[metric], ad[metric], svm[metric], lvq[metric]])
    
    nemenyi_groups = nemenyi_groups.T

    nemenyi_results = sp.posthoc_nemenyi_friedman(nemenyi_groups)
    
    print(f'\nResultados do teste post-hoc de Nemenyi para a métrica \033[1m{metric}\033[0m: \n{nemenyi_results}\n')
    
    for i in range(len(nemenyi_results)):
        for j in range(i + 1, len(nemenyi_results)):
            if nemenyi_results.iloc[i, j] < 0.05:
                print(f"Classificadores {nemenyi_results.index[i]} vs {nemenyi_results.columns[j]} têm diferença significativa\n")


Resultados do teste post-hoc de Nemenyi para a métrica [1mtrain_accuracy[0m: 
         0         1         2         3
0  1.00000  0.001000  0.307130  0.001000
1  0.00100  1.000000  0.160247  0.701825
2  0.30713  0.160247  1.000000  0.009860
3  0.00100  0.701825  0.009860  1.000000

Classificadores 0 vs 1 têm diferença significativa

Classificadores 0 vs 3 têm diferença significativa

Classificadores 2 vs 3 têm diferença significativa


Resultados do teste post-hoc de Nemenyi para a métrica [1mtest_accuracy[0m: 
          0         1         2         3
0  1.000000  0.225871  0.225871  0.028569
1  0.225871  1.000000  0.001000  0.799047
2  0.225871  0.001000  1.000000  0.001000
3  0.028569  0.799047  0.001000  1.000000

Classificadores 0 vs 3 têm diferença significativa

Classificadores 1 vs 2 têm diferença significativa

Classificadores 2 vs 3 têm diferença significativa


Resultados do teste post-hoc de Nemenyi para a métrica [1mf1_score[0m: 
         0         1        2      