In [5]:
from irace2 import irace, norm_sample, truncated_poisson, truncated_skellam
import numpy as np
import random
import scipy.stats as stats
import xgboost as xgb
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from tqdm import tqdm
from sklearn import preprocessing
import pandas as pd
from xgboost import XGBRegressor, XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
import statsmodels.stats.weightstats as stats
import scipy.stats as ss
from sklearn.model_selection import train_test_split,StratifiedShuffleSplit,cross_val_score
from scipy.stats import norm, poisson, skellam
from copy import copy, deepcopy

## Get Data

In [6]:
df = pd.read_csv('spect_train.csv')
X = preprocessing.normalize(df.drop(columns=['OVERALL_DIAGNOSIS']).to_numpy())
y = df['OVERALL_DIAGNOSIS'].to_numpy()

## Models

In [7]:
#all the parameters being configures must be set beforehand
models = [LogisticRegression(C=1), 
    RandomForestClassifier(n_estimators=100,max_depth=5),
    XGBClassifier(n_estimators=100,max_depth=6)]


parameters_dict = {
    'LogisticRegression': {'C': lambda loc : norm_sample(loc=loc, scale=1, min= 1e-2),
                            'penalty':['l2'],
                            'solver':['lbfgs','newton-cg','sag']},
    'RandomForestClassifier': {'n_estimators': lambda loc: truncated_skellam(loc, mu1=10, mu2=10, min=1), 
                                'max_depth': lambda loc: truncated_skellam(loc, mu1=1, mu2=1, min=1)},
    'XGBClassifier': {'sample_type': ['uniform','weighted'], 
                        'max_depth': lambda loc: truncated_skellam(loc, mu1=1, mu2=1, min=1)}
}

## Run race

In [8]:
stat_test = ss.ttest_rel #stats.ttest_ind, stats.mannwhitneyu

pop, pop_scores = irace(models, X, y, lambda x: x > 100, stat_test, parameters_dict, pop_size = 20, scoring='f1_macro', r=30)

In [None]:
print('LR')
scores = cross_val_score(LogisticRegression(), X, y, cv=10, scoring='f1')
print(f'{np.mean(scores)} +- {np.std(scores)}')

print('RF')
scores = cross_val_score(RandomForestClassifier(), X, y, cv=10, scoring='f1')    
print(f'{np.mean(scores)} +- {np.std(scores)}')

print('XGBClassifier')
scores = cross_val_score(XGBClassifier(), X, y, cv=10, scoring='f1')    
print(f'{np.mean(scores)} +- {np.std(scores)}')

print()
for i in range(len(pop)):
    print(pop[i])
    scores = cross_val_score(pop[i], X, y, cv=10, scoring='f1') 
    print(f'{np.mean(scores)} +- {np.std(scores)}')

In [None]:
print()
for i in range(len(pop)):
    print(pop[i]) 
    print(f'{np.mean(pop_scores[i])} +- {np.std(pop_scores[i])}')