## Libraries

In [1]:
from irace2 import irace, dummy_stats_test
from sampling_functions import norm_sample, truncated_poisson, truncated_skellam
import numpy as np
import random
import scipy.stats as stats
import xgboost as xgb
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from tqdm import tqdm
from sklearn import preprocessing
import pandas as pd
from xgboost import XGBRegressor, XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
import statsmodels.stats.weightstats as stats
import scipy.stats as ss
from sklearn.model_selection import train_test_split,StratifiedShuffleSplit,cross_val_score
from scipy.stats import norm, poisson, skellam
from copy import copy, deepcopy
import itertools

## Models setup

In [2]:
#all the numeric parameters being configured must be set beforehand
models = [LogisticRegression(C=1), 
    RandomForestClassifier(n_estimators=100,max_depth=5,ccp_alpha=0.0),
    SVC(C=1,coef0=0.0),
    XGBClassifier(n_estimators=100,max_depth=6,subsample=1)]


parameters_dict = {
    'LogisticRegression': {'C': lambda loc : norm_sample(loc=loc, scale=1, min= 1e-2),
                            'penalty':['l2'],
                            'solver':['lbfgs','newton-cg','sag']},
    'SVC':{'C':lambda loc : norm_sample(loc=loc, scale=1, min= 1e-2),
            'coef0': lambda loc : norm_sample(loc=loc, scale=1, min= 1e-2),
            'kernel':['linear','poly','rbf','sigmoid'],
            'decision_function_shape':['ovo','ovr']},
    'RandomForestClassifier': {'n_estimators': lambda loc: truncated_skellam(loc, mu1=10, mu2=10, min=1), 
                                'max_depth': lambda loc: truncated_skellam(loc, mu1=1, mu2=1, min=1),
                                'max_features':['sqrt', 'log2', None],
                                'ccp_alpha':lambda loc : norm_sample(loc=loc, scale=0.1, min= 1e-3)
                                },
    'XGBClassifier': {'sample_type': ['uniform','weighted'], 
                        'max_depth': lambda loc: truncated_skellam(loc, mu1=1, mu2=1, min=1),
                        'booster':['gbtree','dart'],
                        'subsample':lambda loc : norm_sample(loc=loc, scale=0.3, min= 1e-2,max=1)}
}

## Tests of hypothesis

In [3]:
stat_tests = [ ss.ttest_rel,
                ss.ttest_ind,
                ss.mannwhitneyu,
                ss.wilcoxon,
                dummy_stats_test] 

## Irace parameters

In [4]:
train_test_resampling = [10, 30, 100]
cv_splits = [10, 30, 100]
pop_size = [10, 50, 100]
n_gen = [10, 100, 500, 1000]

In [8]:
factors = list(itertools.product(stat_tests,cv_splits,pop_size,n_gen))

In [10]:
for f in factors:
    pop, pop_scores = irace(models, X, y, stop_condition, stat_test, parameters_dict, pop_size, scoring, cv=None, r=100)

10
10
10
10
10
10
10
10
10
10
10
10
30
30
30
30
30
30
30
30
30
30
30
30
100
100
100
100
100
100
100
100
100
100
100
100
10
10
10
10
10
10
10
10
10
10
10
10
30
30
30
30
30
30
30
30
30
30
30
30
100
100
100
100
100
100
100
100
100
100
100
100
10
10
10
10
10
10
10
10
10
10
10
10
30
30
30
30
30
30
30
30
30
30
30
30
100
100
100
100
100
100
100
100
100
100
100
100
10
10
10
10
10
10
10
10
10
10
10
10
30
30
30
30
30
30
30
30
30
30
30
30
100
100
100
100
100
100
100
100
100
100
100
100
10
10
10
10
10
10
10
10
10
10
10
10
30
30
30
30
30
30
30
30
30
30
30
30
100
100
100
100
100
100
100
100
100
100
100
100
