In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler

from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

In [3]:
import initdirs

from pdata.ml import run_gscv, gather_best_hyperparams, gather_best_scores_for_all_gscv

In [4]:
X, y = load_wine(return_X_y=True)

In [5]:
scaler = StandardScaler()
scaler.fit(X)
X_scaled = scaler.transform(X)

In [6]:
X_shuffled, y_shuffled = shuffle(X_scaled, y, random_state=42)

In [7]:
#X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, random_state=42)

In [8]:
setup = dict()

setup['random_forest'] = {
    'estimator': RandomForestClassifier(),
    'param_grid': {
        'n_estimators': [5, 10, 20], 
        'min_samples_split': (2, 4, 8, 16, 32),
        'min_samples_leaf': (1, 2, 4, 8),
        'criterion': ['gini', 'entropy'],
        'random_state': [1, 4, 42, 100],
    }
}

setup['adaboost'] = {
    'estimator': AdaBoostClassifier(),
    'param_grid': {
        'base_estimator': [DecisionTreeClassifier(max_depth=1), DecisionTreeClassifier(max_depth=2)],
        'n_estimators': [10, 25, 50],
        'learning_rate': [0.5, 1.],
        'algorithm': ['SAMME', 'SAMME.R'],
        'random_state': [1, 4, 42, 100],
    }
}

setup['svm'] = {
    'estimator': SVC(),
    'param_grid': {
        'C': [0.5, 1., 1.5, 2.],
        'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
        'gamma': ['scale', 'auto'],
        'random_state': [1, 4, 42, 100],
    }
}

In [9]:
gscv_res = run_gscv(X_shuffled, y_shuffled, setup, score_func=None)

In [10]:
gscv_res

{'random_forest': GridSearchCV(cv=4, estimator=RandomForestClassifier(),
              param_grid={'criterion': ['gini', 'entropy'],
                          'min_samples_leaf': (1, 2, 4, 8),
                          'min_samples_split': (2, 4, 8, 16, 32),
                          'n_estimators': [5, 10, 20],
                          'random_state': [1, 4, 42, 100]}),
 'adaboost': GridSearchCV(cv=4, estimator=AdaBoostClassifier(),
              param_grid={'algorithm': ['SAMME', 'SAMME.R'],
                          'base_estimator': [DecisionTreeClassifier(max_depth=1),
                                             DecisionTreeClassifier(max_depth=2)],
                          'learning_rate': [0.5, 1.0],
                          'n_estimators': [10, 25, 50],
                          'random_state': [1, 4, 42, 100]}),
 'svm': GridSearchCV(cv=4, estimator=SVC(),
              param_grid={'C': [0.5, 1.0, 1.5, 2.0], 'gamma': ['scale', 'auto'],
                          'kernel': ['

In [11]:
gather_best_scores_for_all_gscv(gscv_res)

Unnamed: 0,best_score,n_with_best,grid_size,share_with_best
random_forest,0.994318,1,480,0.002083
adaboost,0.983207,1,96,0.010417
svm,0.983081,16,128,0.125
