In [1]:
import numpy as np
import pandas as pd
import scipy.stats as st
from tqdm import tqdm_notebook as tqdm

import seaborn as sns

import matplotlib.pyplot as plt

from permute.core import two_sample

from arch.bootstrap import IIDBootstrap

from statsmodels.stats.diagnostic import kstest_normal

from sklearn.datasets import load_breast_cancer
from sklearn.datasets import load_digits
from sklearn.datasets import load_iris
from sklearn.datasets import load_wine

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import scale

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

from sklearn.metrics import roc_auc_score

import itertools

from statsmodels.stats.multitest import multipletests

# Задача 9

In [2]:
def get_scores(model, X_train, y_train, X_test, y_test, metric = roc_auc_score):
    
    a = np.unique(y_train)
    pair = list(itertools.combinations(range(a.shape[0]), 2))
    
    List_of_auc = []
    List_of_acc = []
    
    for i, j in pair:
        indexes = np.where((y_train == i) + (y_train == j))[0]
        X_train_new = X_train[indexes]
        y_train_new = np.array(y_train[indexes])
        indexes = np.where((y_test == i) + (y_test == j))[0]
        X_test_new = X_test[indexes]
        y_test_new = np.array(y_test[indexes])
        
        y_train_new[np.where(y_train_new == i)] = np.zeros_like(y_train_new[np.where(y_train_new == i)])
        y_train_new[np.where(y_train_new == j)] = np.ones_like(y_train_new[np.where(y_train_new == j)])
        y_test_new[np.where(y_test_new == i)] = np.zeros_like(y_test_new[np.where(y_test_new == i)])
        y_test_new[np.where(y_test_new == j)] = np.ones_like(y_test_new[np.where(y_test_new == j)])
        
        model.fit(X_train_new, y_train_new)
        
        y_proba = model.predict_proba(X_test_new)
        y_redict = model.predict(X_test_new)
        
        List_of_auc.append(roc_auc_score(y_test_new, y_proba[:, 1]))
        List_of_acc.append((y_redict == y_test_new).sum()/y_test_new.shape[0])
    
    return List_of_auc, List_of_acc

In [3]:
List_of_method = [LogisticRegression, DecisionTreeClassifier, KNeighborsClassifier, SVC]
List_of_dataset = [load_wine(), load_digits(), load_breast_cancer(), load_iris()]

In [4]:
List_of_data = []

for dataset in List_of_dataset:
    
    X = scale(dataset.data)
    y = dataset.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.5)
    
    List_of_data.append([X_train, y_train, X_test, y_test])

In [5]:
AUC = dict()
ACC = dict()

In [6]:
AUC['SVC'] = []
AUC['LogisticRegression'] = []
AUC['DecisionTreeClassifier'] = []
AUC['KNeighborsClassifier'] = []

ACC['SVC'] = []
ACC['LogisticRegression'] = []
ACC['DecisionTreeClassifier'] = []
ACC['KNeighborsClassifier'] = []

for X_train, y_train, X_test, y_test in List_of_data:

    print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
    
    for method in List_of_method:
        if method.__name__ == 'SVC':
            model = method(probability=True)
        else:
            model = method()
        auc, acc = get_scores(model, X_train, y_train, X_test, y_test, metric = roc_auc_score)
        AUC[method.__name__] += auc
        ACC[method.__name__] += acc
        
        
        
AUC['SVC'] = np.hstack(AUC['SVC'])
AUC['LogisticRegression'] = np.hstack(AUC['LogisticRegression'])
AUC['DecisionTreeClassifier'] = np.hstack(AUC['DecisionTreeClassifier'])
AUC['KNeighborsClassifier'] = np.hstack(AUC['KNeighborsClassifier'])

ACC['SVC'] = np.hstack(ACC['SVC'])
ACC['LogisticRegression'] = np.hstack(ACC['LogisticRegression'])
ACC['DecisionTreeClassifier'] = np.hstack(ACC['DecisionTreeClassifier'])
ACC['KNeighborsClassifier'] = np.hstack(ACC['KNeighborsClassifier'])

auc = np.vstack([AUC['SVC'], AUC['LogisticRegression'], AUC['DecisionTreeClassifier'], AUC['KNeighborsClassifier']])
auc = np.transpose(auc)

acc = np.vstack([ACC['SVC'], ACC['LogisticRegression'], ACC['DecisionTreeClassifier'], ACC['KNeighborsClassifier']])
acc = np.transpose(acc)


(89, 13) (89,) (89, 13) (89,)
(898, 64) (898,) (899, 64) (899,)
(284, 30) (284,) (285, 30) (285,)
(75, 4) (75,) (75, 4) (75,)


### Постановка задачи

Пусть имеем 4 разных метода бинарной классификации. Для каждой модели проведено 52 эксперимента для разных выборок. Нужно проверить, есть ли явный лидер среди заданых 4 методов.

Под лидером будем подрозумевать, что он является лучшим классификатором, чем все остальные.

Для каждой модели проверим следующую гипотезу:

$$H_0: m_i > m_j~\forall~j\not=i.$$

В силу того, что у нас 4 модели, воспользуемся множественной поправкой Холма.

### Решение

При помощи перестановочного критери для всех пар $i,j$ проверим гипотезу о том, что $m_i > m_j$ для точности моделей.

In [7]:
ACC_p_values = np.zeros(shape = [4, 4])
for i in tqdm(range(4)):
    for j in tqdm(range(4), leave = False):
        ACC_p_values[i, j] = two_sample(acc[:, i], acc[:, j],  alternative='less')[0]

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))




Получаем следующие $p$-value для перестановочного критерия.

In [8]:
print(ACC_p_values)

[[0.50111 0.03031 1.      0.3837 ]
 [0.97067 0.50083 1.      0.83989]
 [0.      0.      0.49924 0.     ]
 [0.61067 0.16009 1.      0.49816]]


Воспользуемся поправкой Холма на множественное тестирование.

In [9]:
List_of_answer = []
for i in range(4):
    List_of_answer.append(multipletests(ACC_p_values[i], method='holm')[0].any())
    
print('Is best?:')
print(np.array(List_of_answer ) == False)

Is best?:
[ True  True False  True]


Получаем, что все, кроме метода 'DecisionTreeClassifier' могут быть лучшимы.

При помощи перестановочного критери для всех пар $i,j$ проверим гипотезу о том, что $m_i > m_j$ для AUC моделей.

In [10]:
AUC_p_values = np.zeros(shape = [4, 4])
for i in tqdm(range(4)):
    for j in tqdm(range(4), leave = False):
        AUC_p_values[i, j] = two_sample(auc[:, i], auc[:, j],  alternative='less')[0]

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))




Получаем следующие $p$-value для перестановочного критерия.

In [11]:
print(AUC_p_values)

[[0.49937 0.15545 1.      0.9462 ]
 [0.84364 0.50282 1.      0.98741]
 [0.      0.      0.5012  0.     ]
 [0.0531  0.01351 1.      0.50146]]


Воспользуемся поправкой Холма на множественное тестирование.

In [12]:
List_of_answer = []
for i in range(4):
    List_of_answer.append(multipletests(AUC_p_values[i], method='holm')[0].any())
    
print('Is best?:')
print(np.array(List_of_answer ) == False)

Is best?:
[ True  True False  True]


Получаем, что все, кроме метода 'DecisionTreeClassifier' могут быть лучшимы.

### Вывод

Как видно из решения, получаем, что явного лидера у нас нету, но зато есть явный аутсайдер, это метод 'DecisionTreeClassifier'.