In [1]:
from irace2 import irace, norm_sample, truncated_poisson, truncated_skellam

In [2]:
import numpy as np
import random
import scipy.stats as stats
import xgboost as xgb
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from tqdm import tqdm
from sklearn import preprocessing
import pandas as pd
from xgboost import XGBRegressor, XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
import statsmodels.stats.weightstats as stats
import scipy.stats as ss
from sklearn.model_selection import train_test_split,StratifiedShuffleSplit,cross_val_score
from scipy.stats import norm, poisson, skellam
from copy import copy, deepcopy

## Get Data

In [3]:
df = pd.read_csv('spect_train.csv')
X = preprocessing.normalize(df.drop(columns=['OVERALL_DIAGNOSIS']).to_numpy())
y = df['OVERALL_DIAGNOSIS'].to_numpy()

## Models

In [4]:
#all the parameters being configures must be set beforehand
models = [LogisticRegression(C=1), 
    RandomForestClassifier(n_estimators=100,max_depth=5),
    XGBClassifier(n_estimators=100,max_depth=6)]


parameters_dict = {
    'LogisticRegression': {'C': lambda loc : norm_sample(loc=loc, scale=1, min= 1e-2),
                            'penalty':['l2'],
                            'solver':['lbfgs','newton-cg','sag']},
    'RandomForestClassifier': {'n_estimators': lambda loc: truncated_skellam(loc, mu1=10, mu2=10, min=1), 
                                'max_depth': lambda loc: truncated_skellam(loc, mu1=1, mu2=1, min=1)},
    'XGBClassifier': {'sample_type': ['uniform','weighted'], 
                        'max_depth': lambda loc: truncated_skellam(loc, mu1=1, mu2=1, min=1)}
}

## Run race

In [5]:
stat_test = ss.ttest_rel #stats.ttest_ind, stats.mannwhitneyu

pop, pop_scores = irace(models, X, y, lambda x: x > 100, stat_test, parameters_dict, pop_size = 20, cv = 10, scoring='f1_macro')


Gen 0

Average scores: 0.7173928571428572
Gen 1

Average scores: 0.7334978354978354
Gen 2

Average scores: 0.7455003607503607
Gen 3

Average scores: 0.7533520923520924
Gen 4

Average scores: 0.7533520923520924
Gen 5

Average scores: 0.7533520923520924
Gen 6

Average scores: 0.7533520923520924
Gen 7

Average scores: 0.7533520923520924
Gen 8

Average scores: 0.7533520923520924
Gen 9

Average scores: 0.7573950216450217
Gen 10

Average scores: 0.7573950216450217
Gen 11

Average scores: 0.7573950216450217
Gen 12

Average scores: 0.7573950216450217
Gen 13

Average scores: 0.7573950216450217
Gen 14

Average scores: 0.7573950216450217
Gen 15

Average scores: 0.7573950216450217
Gen 16

Average scores: 0.7573950216450217
Gen 17

Average scores: 0.7573950216450217
Gen 18

Average scores: 0.763726911976912
Gen 19

Average scores: 0.763726911976912
Gen 20

Average scores: 0.763726911976912
Gen 21

Average scores: 0.763726911976912
Gen 22

Average scores: 0.763726911976912
Gen 23

Average scores: 0.

In [6]:
print('LR')
scores = cross_val_score(LogisticRegression(), X, y, cv=10, scoring='f1')
print(f'{np.mean(scores)} +- {np.std(scores)}')

print('RF')
scores = cross_val_score(RandomForestClassifier(), X, y, cv=10, scoring='f1')    
print(f'{np.mean(scores)} +- {np.std(scores)}')

print('XGBClassifier')
scores = cross_val_score(XGBClassifier(), X, y, cv=10, scoring='f1')    
print(f'{np.mean(scores)} +- {np.std(scores)}')

print()
for i in range(len(pop)):
    print(pop[i])
    scores = cross_val_score(RandomForestClassifier(), X, y, cv=10, scoring='f1') 
    print(f'{np.mean(scores)} +- {np.std(scores)}')

LR
0.5724603174603174 +- 0.22907793582157168
RF
0.7372222222222222 +- 0.18822895588540903
XGBClassifier
0.6780952380952381 +- 0.19253588763451576

RandomForestClassifier(max_depth=5, n_estimators=117)
0.7884126984126983 +- 0.1910095706624568
RandomForestClassifier(max_depth=7, n_estimators=97)
0.7673809523809523 +- 0.1969525478952963
RandomForestClassifier(max_depth=3, n_estimators=113)
0.7312698412698413 +- 0.17159868103170728
RandomForestClassifier(max_depth=10, n_estimators=108)
0.6983333333333334 +- 0.15730529122863626
RandomForestClassifier(max_depth=5, n_estimators=99)
0.720952380952381 +- 0.19827144178083322
XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constra