# Stacking Runs

This notebook runs the stacking ensemble with different base learners $n$ times and saves the output to a CSV file separately for each base learner.

## Imports

In [1]:
# base learners
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier

# ensemble
from sklearn.ensemble import StackingClassifier
from mlxtend.classifier import StackingCVClassifier

# custom modules
from tadpole import Tadpole
from classifier import Classifier
from main import run, get_params

## Data Loading

In [2]:
%%time
tp = Tadpole()

CPU times: user 4.16 s, sys: 324 ms, total: 4.48 s
Wall time: 4.48 s


In [3]:
# define number of runs
n_runs = 30

In [4]:
# fetch best param grids
params_lra = get_params('refit_lra')
params_dtc = get_params('refit_dtc')
params_ann = get_params('refit_ann')
params_svc = get_params('refit_svc')

In [5]:
# output filename
stk_out = './results/refit_stk.csv' # stacking with best params of base learners
stk_cv_out = './results/refit_stk_cv.csv' # stacking with params from grid search

## Stacking Ensemble
### Base Learners

In [6]:
# define all base learners (Best Params from Grid Search)
lra = LogisticRegression()
lra.set_params(**params_lra)

dtc = DecisionTreeClassifier()
dtc.set_params(**params_dtc)

mlp = MLPClassifier()
mlp.set_params(**params_ann)

svm = SVC(probability=True)
svm.set_params(**params_svc);

In [7]:
# define level 1 estimators
estimators = [
    ('lra', lra),
    ('dt', dtc),
    ('ann', mlp),
    ('svm', svm)
]

In [8]:
stk = StackingClassifier(estimators=estimators, 
                             final_estimator=LogisticRegression(), 
                             verbose=0)
stk_clf = Classifier(stk)
run(stk_clf, tp, n_runs=n_runs, output=stk_out)

100%|██████████| 30/30 [02:30<00:00,  5.00s/it]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.889978,0.923466,0.971859,0.978002
1,0.890328,0.871586,0.974175,0.956105
2,0.888067,0.899,0.971815,0.966109
3,0.883882,0.877003,0.973124,0.967882
4,0.897114,0.869197,0.973183,0.970524
5,0.894509,0.888103,0.972527,0.973702
6,0.891749,0.898917,0.971802,0.975088
7,0.89438,0.888103,0.972807,0.968192
8,0.881225,0.880359,0.972143,0.972218
9,0.880843,0.886908,0.967564,0.973656


## Stacking (Grid Search Parameters)

In [9]:
# define base learners
clf1 = LogisticRegression(class_weight='balanced', C=10000.0)
clf2 = DecisionTreeClassifier(class_weight='balanced', max_depth=8)
clf3 = MLPClassifier(learning_rate='adaptive', hidden_layer_sizes=(50, 50, 50))
clf4 = SVC(probability=True, class_weight='balanced', C=1)
lr = LogisticRegression(class_weight='balanced', C=2.782559402207126)

In [10]:
stk_cv = StackingCVClassifier(classifiers=[clf1, clf2, clf3, clf4], meta_classifier=lr,
                            random_state=43, use_probas=True)
stk_cv_clf = Classifier(stk_cv)
run(stk_cv_clf, tp, n_runs=n_runs, output=stk_cv_out)

100%|██████████| 30/30 [01:36<00:00,  3.23s/it]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.910127,0.909565,0.982694,0.978421
1,0.91769,0.881761,0.985643,0.957747
2,0.906507,0.907177,0.982224,0.970152
3,0.913159,0.886663,0.984656,0.966347
4,0.913623,0.879083,0.98189,0.968553
5,0.909514,0.895456,0.980974,0.971802
6,0.905451,0.90903,0.974429,0.973326
7,0.912377,0.90042,0.977006,0.968395
8,0.908252,0.91008,0.977305,0.973745
9,0.901564,0.92285,0.978771,0.977045
