In [None]:
from sklearn import datasets
from sklearn.ensemble import (
    RandomForestClassifier,
    AdaBoostClassifier,
    GradientBoostingClassifier,
)
from sklearn.model_selection import GridSearchCV
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, MaxAbsScaler, MinMaxScaler
from sklearn.svm import SVC

from pipelinehelper import PipelineHelper


In [None]:

X, y = datasets.load_iris(return_X_y=True)

In [None]:

pipe = Pipeline([
    ('scaler', PipelineHelper([
        ('std', StandardScaler()),
        ('max', MaxAbsScaler()),
    ], optional=True)),

    ('classifier', PipelineHelper([
        # ('svm', SVC()),
        # ('rf', RandomForestClassifier()),
        # ('ada', AdaBoostClassifier()),
        # ('gb', GradientBoostingClassifier()),
        ('knn', KNeighborsClassifier()),
        # ('nb_pipe', Pipeline([
        #     # Naivie Bayes needs positive numbers
        #     ('scaler', MinMaxScaler()),
        #     ('nb', MultinomialNB()),
        # ])),
    ])),
])

params = {
    'scaler__selected_model': pipe.named_steps['scaler'].generate(
        {
            'std__with_mean': [True, False],
            'std__with_std': [True, False],
            # no params for 'max' leads to using standard params
        }
    ),
    'classifier__selected_model': pipe.named_steps['classifier'].generate(
        {
            # 'svm__C': [0.1, 1.0],
            # 'svm__kernel': ['linear', 'rbf'],
            # 'rf__n_estimators': [10, 20, 50, 100, 150],
            # 'rf__max_features': ['sqrt', 'log2'],
            # 'rf__min_samples_split': [2, 5, 10],
            # 'rf__min_samples_leaf': [1, 2, 4],
            # 'rf__bootstrap': [True, False],
            # 'ada__n_estimators': [10, 20, 40, 100],
            # 'ada__algorithm': ['SAMME', 'SAMME.R'],
            # 'gb__n_estimators': [10, 20, 50, 100],
            # 'gb__criterion': ['friedman_mse', 'squared_error'],
            # 'gb__max_features': ['sqrt', None],
            'knn__n_neighbors': [2, 3, 5, 7, 10],
            'knn__leaf_size': [1, 2, 3, 5],
            'knn__weights': ['uniform', 'distance'],
            'knn__algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
            # 'nb_pipe__nb__fit_prior': [True, False],
            # 'nb_pipe__nb__alpha': [0.1, 0.2],
        }
    ),
}


In [None]:
grid = GridSearchCV(pipe, params, scoring='accuracy', verbose=1, n_jobs=-1)
grid.fit(X, y)


In [None]:
print(grid.best_params_)
print(grid.best_score_)