In [1]:
# from skopt import BayesSearchCV
# from skopt.space import Real, Categorical, Integer

from scipy.stats import uniform as sp_uniform
from scipy.stats import randint as sp_randint

from mlxtend.classifier import StackingCVClassifier, StackingClassifier

from sklearn.datasets import load_iris
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import RandomizedSearchCV

from lightgbm import LGBMClassifier

import numpy as np

In [2]:
iris = load_iris()
X = iris.data
y = iris.target

In [3]:
RANDOM_SEED = 2

In [4]:
lr_pipe = Pipeline([('prep', StandardScaler()),
                    ('lr', LogisticRegression())])
knn_pipe = Pipeline([('prep', StandardScaler()),
                     ('knn', KNeighborsClassifier())])
rf = RandomForestClassifier(random_state=RANDOM_SEED)

In [5]:
np.random.seed(RANDOM_SEED)
sclf1 = StackingCVClassifier(classifiers=[lr_pipe, knn_pipe, rf], 
                             cv=3,
                             use_features_in_secondary=True, 
#                              use_probas=True,
                             meta_classifier=LogisticRegression(random_state=RANDOM_SEED))

sclf2 = StackingCVClassifier(classifiers=[lr_pipe, knn_pipe, rf], 
                             cv=3,
                             use_features_in_secondary=True, 
#                              use_probas=True,
                             meta_classifier=RandomForestClassifier(random_state=RANDOM_SEED))

top_sclf = StackingCVClassifier(classifiers=[sclf1, sclf2], 
                                cv=3,
                                use_features_in_secondary=True, 
#                              use_probas=True,
                                 meta_classifier=RandomForestClassifier(random_state=RANDOM_SEED))

top_sclf2 = StackingCVClassifier(classifiers=[sclf1, sclf2], 
                                 cv=3,
                                 use_features_in_secondary=True,  
#                              use_probas=True,
                                 meta_classifier=LGBMClassifier(seed=RANDOM_SEED))

In [6]:
# instantiate pipeline to select model
pipe = Pipeline([('model', LGBMClassifier())])

In [7]:
top_sclf_search = {'model': [top_sclf],
                   'model__stackingcvclassifier-1__pipeline-1__lr__C': sp_uniform(1e-6, 1e6),
                   'model__stackingcvclassifier-1__pipeline-1__lr__penalty': ['l1', 'l2'],
                   'model__stackingcvclassifier-1__pipeline-2__knn__n_neighbors': sp_randint(1, 20),
                   'model__stackingcvclassifier-1__randomforestclassifier__max_features': sp_randint(1, 5),
                   'model__stackingcvclassifier-1__meta-logisticregression__C': sp_uniform(1e-6, 1e6),
                   'model__stackingcvclassifier-1__meta-logisticregression__penalty': ['l1', 'l2'],

                   'model__stackingcvclassifier-2__pipeline-1__lr__C': sp_uniform(1e-6, 1e6),
                   'model__stackingcvclassifier-2__pipeline-1__lr__penalty': ['l1', 'l2'],
                   'model__stackingcvclassifier-2__pipeline-2__knn__n_neighbors': sp_randint(1, 20),
                   'model__stackingcvclassifier-2__randomforestclassifier__max_features': sp_randint(1, 5),
                   'model__stackingcvclassifier-2__meta-randomforestclassifier__max_features': sp_randint(1, 5),

                   'model__meta-randomforestclassifier__max_features': sp_randint(1, 5)
                 }

top_sclf_search2 = {'model': [top_sclf2],
                   'model__stackingcvclassifier-1__pipeline-1__lr__C': sp_uniform(1e-6, 1e6),
                   'model__stackingcvclassifier-1__pipeline-1__lr__penalty': ['l1', 'l2'],
                   'model__stackingcvclassifier-1__pipeline-2__knn__n_neighbors': sp_randint(1, 20),
                   'model__stackingcvclassifier-1__randomforestclassifier__max_features': sp_randint(1, 5),
                   'model__stackingcvclassifier-1__meta-logisticregression__C': sp_uniform(1e-6, 1e6),
                   'model__stackingcvclassifier-1__meta-logisticregression__penalty': ['l1', 'l2'],

                   'model__stackingcvclassifier-2__pipeline-1__lr__C': sp_uniform(1e-6, 1e6),
                   'model__stackingcvclassifier-2__pipeline-1__lr__penalty': ['l1', 'l2'],
                   'model__stackingcvclassifier-2__pipeline-2__knn__n_neighbors': sp_randint(1, 20),
                   'model__stackingcvclassifier-2__randomforestclassifier__max_features': sp_randint(1, 5),
                   'model__stackingcvclassifier-2__meta-randomforestclassifier__max_features': sp_randint(1, 5),

                    'model__meta-lgbmclassifier__num_leaves': sp_randint(2, 100)
                 }

In [10]:
opt = RandomizedSearchCV(pipe, param_distributions=top_sclf_search, 
                         n_iter=100, n_jobs=-1, random_state=RANDOM_SEED,cv=5)

In [12]:
opt.fit(X, y)

RandomizedSearchCV(cv=5, error_score='raise',
          estimator=Pipeline(steps=[('model', LGBMClassifier(boosting_type='gbdt', colsample_bytree=1, learning_rate=0.1,
        max_bin=255, max_depth=-1, min_child_samples=10,
        min_child_weight=5, min_split_gain=0, n_estimators=10, nthread=-1,
        num_leaves=31, objective='binary', reg_alpha=0, reg_lambda=0,
        seed=0, silent=True, subsample=1, subsample_for_bin=50000,
        subsample_freq=1))]),
          fit_params={}, iid=True, n_iter=100, n_jobs=-1,
          param_distributions={'model': [StackingCVClassifier(classifiers=[StackingCVClassifier(classifiers=[Pipeline(steps=[('prep', StandardScaler(copy=True, with_mean=True, with_std=True)), ('lr', LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, ma...stclassifier__max_features': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f695fdbc0f0>},
          pre_dispatch='2*n_jobs', random_state=2, refit=Tru

In [13]:
opt.best_estimator_

Pipeline(steps=[('model', StackingCVClassifier(classifiers=[StackingCVClassifier(classifiers=[Pipeline(steps=[('prep', StandardScaler(copy=True, with_mean=True, with_std=True)), ('lr', LogisticRegression(C=868916.62643307878, class_weight=None, dual=False,
          fit_intercept=True, intercept_scaling=1, m...uffle=True, stratify=True, use_features_in_secondary=True,
           use_probas=False, verbose=0))])

In [14]:
opt.best_params_

{'model': StackingCVClassifier(classifiers=[StackingCVClassifier(classifiers=[Pipeline(steps=[('prep', StandardScaler(copy=True, with_mean=True, with_std=True)), ('lr', LogisticRegression(C=868916.62643307878, class_weight=None, dual=False,
           fit_intercept=True, intercept_scaling=1, max_iter=100,
           multi_class='ov...huffle=True, stratify=True, use_features_in_secondary=True,
            use_probas=False, verbose=0)],
            cv=3,
            meta_classifier=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
             max_depth=None, max_features=1, max_leaf_nodes=None,
             min_impurity_split=1e-07, min_samples_leaf=1,
             min_samples_split=2, min_weight_fraction_leaf=0.0,
             n_estimators=10, n_jobs=1, oob_score=False, random_state=2,
             verbose=0, warm_start=False),
            shuffle=True, stratify=True, use_features_in_secondary=True,
            use_probas=False, verbose=0),
 'model__meta-rando

In [15]:
import pandas as pd
cv_results = pd.DataFrame.from_dict(opt.cv_results_)

In [17]:
pd.options.display.max_columns = None
cv_results.sort_values('mean_test_score', ascending=False)

Unnamed: 0,mean_fit_time,mean_score_time,mean_test_score,mean_train_score,param_model,param_model__meta-randomforestclassifier__max_features,param_model__stackingcvclassifier-1__meta-logisticregression__C,param_model__stackingcvclassifier-1__meta-logisticregression__penalty,param_model__stackingcvclassifier-1__pipeline-1__lr__C,param_model__stackingcvclassifier-1__pipeline-1__lr__penalty,param_model__stackingcvclassifier-1__pipeline-2__knn__n_neighbors,param_model__stackingcvclassifier-1__randomforestclassifier__max_features,param_model__stackingcvclassifier-2__meta-randomforestclassifier__max_features,param_model__stackingcvclassifier-2__pipeline-1__lr__C,param_model__stackingcvclassifier-2__pipeline-1__lr__penalty,param_model__stackingcvclassifier-2__pipeline-2__knn__n_neighbors,param_model__stackingcvclassifier-2__randomforestclassifier__max_features,params,rank_test_score,split0_test_score,split0_train_score,split1_test_score,split1_train_score,split2_test_score,split2_train_score,split3_test_score,split3_train_score,split4_test_score,split4_train_score,std_fit_time,std_score_time,std_test_score,std_train_score
99,0.867958,0.006227,0.966667,0.980000,StackingCVClassifier(classifiers=[StackingCVCl...,1,697229,l2,400634,l2,19,3,3,580768,l1,4,3,{'model': StackingCVClassifier(classifiers=[St...,1,0.966667,0.991667,0.966667,0.983333,0.933333,0.991667,0.966667,0.966667,1.0,0.966667,0.112590,0.001698,0.021082,0.011304
12,1.099184,0.007807,0.966667,0.988333,StackingCVClassifier(classifiers=[StackingCVCl...,1,786172,l1,881652,l1,18,4,4,868917,l2,2,1,{'model': StackingCVClassifier(classifiers=[St...,1,0.966667,0.983333,0.966667,0.991667,0.933333,0.991667,0.966667,0.975000,1.0,1.000000,0.087004,0.000174,0.021082,0.008498
58,1.181064,0.011923,0.966667,0.983333,StackingCVClassifier(classifiers=[StackingCVCl...,2,912677,l1,243487,l1,3,1,2,929349,l1,10,4,{'model': StackingCVClassifier(classifiers=[St...,1,0.966667,0.983333,0.966667,0.983333,0.933333,0.983333,0.966667,0.991667,1.0,0.975000,0.099193,0.007331,0.021082,0.005270
47,1.009260,0.009510,0.966667,0.988333,StackingCVClassifier(classifiers=[StackingCVCl...,3,694239,l2,299521,l2,13,1,2,639497,l2,8,1,{'model': StackingCVClassifier(classifiers=[St...,1,0.966667,0.983333,0.966667,0.983333,0.933333,0.991667,0.966667,1.000000,1.0,0.983333,0.072150,0.002169,0.021082,0.006667
40,0.938803,0.007888,0.966667,0.981667,StackingCVClassifier(classifiers=[StackingCVCl...,4,810773,l2,496728,l2,7,2,4,206098,l2,17,3,{'model': StackingCVClassifier(classifiers=[St...,1,0.966667,0.966667,0.966667,0.991667,0.966667,0.966667,0.933333,0.991667,1.0,0.991667,0.012960,0.000477,0.021082,0.012247
39,1.052116,0.012201,0.966667,0.976667,StackingCVClassifier(classifiers=[StackingCVCl...,3,512511,l2,357518,l2,7,1,3,170686,l1,18,1,{'model': StackingCVClassifier(classifiers=[St...,1,0.966667,0.975000,0.966667,0.966667,0.933333,0.983333,0.966667,0.983333,1.0,0.975000,0.066915,0.007429,0.021082,0.006236
66,1.232484,0.008426,0.966667,0.988333,StackingCVClassifier(classifiers=[StackingCVCl...,3,43220.9,l2,953692,l1,15,4,4,772890,l1,3,4,{'model': StackingCVClassifier(classifiers=[St...,1,0.966667,0.983333,0.966667,0.991667,0.966667,0.975000,0.933333,1.000000,1.0,0.991667,0.121839,0.000918,0.021082,0.008498
71,1.045901,0.008265,0.966667,0.976667,StackingCVClassifier(classifiers=[StackingCVCl...,2,663608,l2,745553,l2,12,3,1,209953,l1,11,3,{'model': StackingCVClassifier(classifiers=[St...,1,0.966667,0.975000,0.966667,0.975000,0.966667,0.983333,0.933333,0.983333,1.0,0.966667,0.074911,0.000773,0.021082,0.006236
30,0.981067,0.007553,0.966667,0.995000,StackingCVClassifier(classifiers=[StackingCVCl...,1,651092,l1,246254,l1,14,3,3,120040,l2,17,2,{'model': StackingCVClassifier(classifiers=[St...,1,0.966667,0.991667,1.000000,1.000000,0.933333,0.991667,0.933333,1.000000,1.0,0.991667,0.060560,0.000375,0.029814,0.004082
25,1.238231,0.007963,0.966667,0.998333,StackingCVClassifier(classifiers=[StackingCVCl...,1,506087,l1,780302,l2,9,3,4,876562,l1,18,1,{'model': StackingCVClassifier(classifiers=[St...,1,0.966667,0.991667,0.966667,1.000000,0.933333,1.000000,0.966667,1.000000,1.0,1.000000,0.128548,0.000229,0.021082,0.003333


In [18]:
opt2 = RandomizedSearchCV(pipe, param_distributions=top_sclf_search2, 
                          n_iter=100, n_jobs=-1, random_state=RANDOM_SEED)
opt2.fit(X, y)

opt2.best_params_

{'model': StackingCVClassifier(classifiers=[StackingCVClassifier(classifiers=[Pipeline(steps=[('prep', StandardScaler(copy=True, with_mean=True, with_std=True)), ('lr', LogisticRegression(C=550977.90532541927, class_weight=None, dual=False,
           fit_intercept=True, intercept_scaling=1, max_iter=100,
           multi_class='ov...huffle=True, stratify=True, use_features_in_secondary=True,
            use_probas=False, verbose=0)],
            cv=3,
            meta_classifier=LGBMClassifier(boosting_type='gbdt', colsample_bytree=1, learning_rate=0.1,
         max_bin=255, max_depth=-1, min_child_samples=10,
         min_child_weight=5, min_split_gain=0, n_estimators=10, nthread=-1,
         num_leaves=94, objective='binary', reg_alpha=0, reg_lambda=0,
         seed=2, silent=True, subsample=1, subsample_for_bin=50000,
         subsample_freq=1),
            shuffle=True, stratify=True, use_features_in_secondary=True,
            use_probas=False, verbose=0),
 'model__meta-lgbmclass

In [19]:
cv_results2 = pd.DataFrame(opt2.cv_results_)

cv_results2.sort_values('mean_test_score', ascending=False)


Unnamed: 0,mean_fit_time,mean_score_time,mean_test_score,mean_train_score,param_model,param_model__meta-lgbmclassifier__num_leaves,param_model__stackingcvclassifier-1__meta-logisticregression__C,param_model__stackingcvclassifier-1__meta-logisticregression__penalty,param_model__stackingcvclassifier-1__pipeline-1__lr__C,param_model__stackingcvclassifier-1__pipeline-1__lr__penalty,param_model__stackingcvclassifier-1__pipeline-2__knn__n_neighbors,param_model__stackingcvclassifier-1__randomforestclassifier__max_features,param_model__stackingcvclassifier-2__meta-randomforestclassifier__max_features,param_model__stackingcvclassifier-2__pipeline-1__lr__C,param_model__stackingcvclassifier-2__pipeline-1__lr__penalty,param_model__stackingcvclassifier-2__pipeline-2__knn__n_neighbors,param_model__stackingcvclassifier-2__randomforestclassifier__max_features,params,rank_test_score,split0_test_score,split0_train_score,split1_test_score,split1_train_score,split2_test_score,split2_train_score,std_fit_time,std_score_time,std_test_score,std_train_score
30,2.819135,0.026610,0.960000,0.966627,StackingCVClassifier(classifiers=[StackingCVCl...,39,536343,l1,734685,l1,3,3,4,429589,l1,1,1,{'model': StackingCVClassifier(classifiers=[St...,1,1.000000,0.949495,0.901961,0.979798,0.979167,0.970588,0.231118,0.013869,0.042507,0.012684
64,2.717059,0.022023,0.960000,0.966825,StackingCVClassifier(classifiers=[StackingCVCl...,94,3573.43,l1,209597,l1,15,1,3,539418,l2,15,1,{'model': StackingCVClassifier(classifiers=[St...,1,0.960784,0.949495,0.921569,1.000000,1.000000,0.950980,0.483506,0.003320,0.031849,0.023466
65,2.434446,0.034075,0.960000,0.990097,StackingCVClassifier(classifiers=[StackingCVCl...,28,459472,l2,235437,l2,16,2,2,428205,l1,11,1,{'model': StackingCVClassifier(classifiers=[St...,1,0.980392,0.989899,0.921569,1.000000,0.979167,0.980392,0.286786,0.017384,0.027588,0.008006
75,2.805660,0.025435,0.960000,0.973559,StackingCVClassifier(classifiers=[StackingCVCl...,43,109932,l2,469773,l2,17,4,2,534215,l2,7,1,{'model': StackingCVClassifier(classifiers=[St...,1,1.000000,0.969697,0.882353,1.000000,1.000000,0.950980,0.585048,0.001938,0.055730,0.020198
46,2.957978,0.031321,0.960000,0.980194,StackingCVClassifier(classifiers=[StackingCVCl...,71,572812,l1,667722,l1,1,2,2,384909,l1,13,2,{'model': StackingCVClassifier(classifiers=[St...,1,0.980392,0.979798,0.921569,1.000000,0.979167,0.960784,0.222398,0.014994,0.027588,0.016012
10,2.580122,0.015973,0.960000,0.970093,StackingCVClassifier(classifiers=[StackingCVCl...,94,503207,l2,497074,l2,8,2,2,550978,l2,15,1,{'model': StackingCVClassifier(classifiers=[St...,1,0.960784,0.949495,0.941176,1.000000,0.979167,0.960784,0.316198,0.007296,0.015435,0.021644
69,2.729430,0.039474,0.960000,0.976827,StackingCVClassifier(classifiers=[StackingCVCl...,7,480584,l1,989228,l1,11,3,2,252809,l2,11,2,{'model': StackingCVClassifier(classifiers=[St...,1,0.980392,0.979798,0.921569,0.989899,0.979167,0.960784,0.205980,0.008258,0.027588,0.012070
49,2.289239,0.030899,0.953333,0.976827,StackingCVClassifier(classifiers=[StackingCVCl...,84,386971,l2,50734.6,l2,6,2,1,980164,l2,17,2,{'model': StackingCVClassifier(classifiers=[St...,8,1.000000,0.969697,0.882353,1.000000,0.979167,0.960784,0.231332,0.003330,0.051643,0.016785
79,3.138870,0.030878,0.953333,0.970093,StackingCVClassifier(classifiers=[StackingCVCl...,45,732494,l1,530469,l1,16,1,1,834890,l2,16,1,{'model': StackingCVClassifier(classifiers=[St...,8,0.960784,0.949495,0.921569,1.000000,0.979167,0.960784,0.164379,0.009704,0.023989,0.021644
84,2.582249,0.024142,0.953333,0.966627,StackingCVClassifier(classifiers=[StackingCVCl...,68,903145,l1,956540,l2,2,2,1,503272,l1,18,1,{'model': StackingCVClassifier(classifiers=[St...,8,0.980392,0.929293,0.882353,1.000000,1.000000,0.970588,0.296915,0.019207,0.051564,0.029002
