In [1]:
import pandas as pd
import numpy as np

Классификатор C4.5 и три его модификации: с оптимизацией гиперпараметра m, гиперпараметра cf и с одновременной оптимизацией обоих гиперпараметров. Эти четыре классификатора сравнивались на 14 наборах данных. На каждом датасете был посчитан AUC каждого классификатора. Данные записаны в файле:

Используя критерий знаковых рангов, проведите попарное сравнение каждого классификатора с каждым. Выберите два классификатора, различие между которыми наиболее статистически значимо.

In [5]:
data = pd.read_csv('AUCs.txt', sep = '\t')

In [12]:
data.head()

Unnamed: 0.1,Unnamed: 0,C4.5,C4.5+m,C4.5+cf,C4.5+m+cf
0,adult (sample),0.763,0.768,0.771,0.798
1,breast cancer,0.599,0.591,0.59,0.569
2,breast cancer wisconsin,0.954,0.971,0.968,0.967
3,cmc,0.628,0.661,0.654,0.657
4,ionosphere,0.882,0.888,0.886,0.898


In [17]:
data.rename(columns = {'Unnamed: 0': 'dataset'}, inplace = True)

In [18]:
data.head()

Unnamed: 0,dataset,C4.5,C4.5+m,C4.5+cf,C4.5+m+cf
0,adult (sample),0.763,0.768,0.771,0.798
1,breast cancer,0.599,0.591,0.59,0.569
2,breast cancer wisconsin,0.954,0.971,0.968,0.967
3,cmc,0.628,0.661,0.654,0.657
4,ionosphere,0.882,0.888,0.886,0.898


In [65]:
%%time
from scipy import stats
ranked_signs_data = []
for i, lhs_column in enumerate(data.drop(['dataset'], axis = 1).columns):
    for j, rhs_column in enumerate(data.drop(['dataset'], axis = 1).columns):
        if i >= j:
            continue
        stat, p_value = stats.wilcoxon(data[lhs_column], data[rhs_column])
        ranked_signs_data.append([lhs_column, rhs_column, stat, p_value])

CPU times: user 7.87 ms, sys: 469 µs, total: 8.34 ms
Wall time: 8.12 ms


In [66]:
ranked_signs = pd.DataFrame(ranked_signs_data)
ranked_signs.columns = ['classifier_A', 'classifier_B', 'statistic', 'p']
ranked_signs

Unnamed: 0,classifier_A,classifier_B,statistic,p
0,C4.5,C4.5+m,6.5,0.010757
1,C4.5,C4.5+cf,43.0,0.861262
2,C4.5,C4.5+m+cf,11.0,0.015906
3,C4.5+m,C4.5+cf,17.0,0.046333
4,C4.5+m,C4.5+m+cf,22.0,0.327826
5,C4.5+cf,C4.5+m+cf,10.0,0.022909


Сколько статистически значимых на уровне 0.05 различий мы обнаружили?

In [68]:
(ranked_signs.p < 0.05).value_counts()

True     4
False    2
Name: p, dtype: int64

Сравнивая 4 классификатора между собой, мы проверили 6 гипотез. Давайте сделаем поправку на множественную проверку. Начнём с метода Холма. Сколько гипотез можно отвергнуть на уровне значимости 0.05 после поправки этим методом?



In [69]:
from statsmodels.sandbox.stats.multicomp import multipletests 
reject, p_corrected, a1, a2 = multipletests(ranked_signs.p, 
                                            alpha = 0.05, 
                                            method = 'holm') 

In [70]:
ranked_signs['p_corrected'] = p_corrected
ranked_signs['reject'] = reject

In [72]:
ranked_signs

Unnamed: 0,classifier_A,classifier_B,statistic,p,p_corrected,reject
0,C4.5,C4.5+m,6.5,0.010757,0.064543,False
1,C4.5,C4.5+cf,43.0,0.861262,0.861262,False
2,C4.5,C4.5+m+cf,11.0,0.015906,0.079532,False
3,C4.5+m,C4.5+cf,17.0,0.046333,0.138998,False
4,C4.5+m,C4.5+m+cf,22.0,0.327826,0.655651,False
5,C4.5+cf,C4.5+m+cf,10.0,0.022909,0.091636,False


In [73]:
ranked_signs.reject.value_counts()

False    6
Name: reject, dtype: int64

Сколько гипотез можно отвергнуть на уровне значимости 0.05 после поправки методом Бенджамини-Хохберга?

In [74]:
from statsmodels.sandbox.stats.multicomp import multipletests 
reject, p_corrected, a1, a2 = multipletests(ranked_signs.p, 
                                            alpha = 0.05, 
                                            method = 'fdr_bh')

In [75]:
ranked_signs['p_corrected'] = p_corrected
ranked_signs['reject'] = reject

In [76]:
ranked_signs

Unnamed: 0,classifier_A,classifier_B,statistic,p,p_corrected,reject
0,C4.5,C4.5+m,6.5,0.010757,0.045818,True
1,C4.5,C4.5+cf,43.0,0.861262,0.861262,False
2,C4.5,C4.5+m+cf,11.0,0.015906,0.045818,True
3,C4.5+m,C4.5+cf,17.0,0.046333,0.069499,False
4,C4.5+m,C4.5+m+cf,22.0,0.327826,0.393391,False
5,C4.5+cf,C4.5+m+cf,10.0,0.022909,0.045818,True


In [77]:
ranked_signs.reject.value_counts()

True     3
False    3
Name: reject, dtype: int64