# Stacking Runs

This notebook runs the stacking ensemble with different base learners $n$ times and saves the output to a CSV file separately for each base learner.

## Imports

In [1]:
# base learners
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier

# ensemble
from sklearn.ensemble import StackingClassifier
from mlxtend.classifier import StackingCVClassifier

# custom modules
from tadpole import Tadpole
from classifier import Classifier
from main import run, get_params

## Data Loading

In [2]:
%%time
tp = Tadpole(debug=False)

CPU times: user 5.06 s, sys: 395 ms, total: 5.45 s
Wall time: 5.47 s


In [3]:
# define number of runs
n_runs = 30

In [4]:
# fetch best param grids
params_lra = get_params('lra')
params_dtc = get_params('dtc')
params_ann = get_params('ann')
params_svc = get_params('svc')

In [5]:
# output filename
stk_out = './results/stk.csv' # stacking with best params of base learners
stk_cv_out = './results/stk_cv.csv' # stacking with params from grid search

## Stacking Ensemble
### Base Learners

In [6]:
# define all base learners (Best Params from Grid Search)
lra = LogisticRegression()
lra.set_params(**params_lra)

dtc = DecisionTreeClassifier()
dtc.set_params(**params_dtc)

mlp = MLPClassifier()
mlp.set_params(**params_ann)

svm = SVC(probability=True)
svm.set_params(**params_svc);

In [7]:
# define level 1 estimators
estimators = [
    ('lra', lra),
    ('dt', dtc),
    ('ann', mlp),
    ('svm', svm)
]

In [8]:
stk = StackingClassifier(estimators=estimators, 
                             final_estimator=LogisticRegression(), 
                             verbose=0)
stk_clf = Classifier(stk)
run(stk_clf, tp, n_runs=n_runs, output=stk_out)

100%|██████████| 30/30 [05:09<00:00, 10.33s/it]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.889736,0.921757,0.976693,0.978782
1,0.894107,0.851155,0.979037,0.95778
2,0.888628,0.900112,0.977321,0.97141
3,0.89955,0.882728,0.975952,0.967603
4,0.902799,0.872987,0.977426,0.971774
5,0.897686,0.88699,0.976636,0.9764
6,0.886677,0.915867,0.972948,0.976185
7,0.897295,0.878299,0.979672,0.967876
8,0.889983,0.886682,0.974431,0.971576
9,0.886044,0.899081,0.971812,0.978943


## Stacking (Grid Search Parameters)

In [9]:
# define base learners
clf1 = LogisticRegression(class_weight='balanced', C=166.81005372000558)
clf2 = DecisionTreeClassifier(class_weight='balanced', max_depth=10)
clf3 = MLPClassifier(learning_rate='adaptive', hidden_layer_sizes=(50, 50, 50))
clf4 = SVC(probability=True, class_weight='balanced', C=1000)
lr = LogisticRegression(class_weight='balanced', C=1291.5496650148827)

In [10]:
stk_cv = StackingCVClassifier(classifiers=[clf1, clf2, clf3, clf4], meta_classifier=lr,
                            random_state=43, use_probas=True)
stk_cv_clf = Classifier(stk_cv)
run(stk_cv_clf, tp, n_runs=n_runs, output=stk_cv_out)

100%|██████████| 30/30 [01:55<00:00,  3.85s/it]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.916722,0.914693,0.98797,0.974259
1,0.928575,0.878116,0.989012,0.955016
2,0.922839,0.904644,0.990924,0.960532
3,0.926382,0.880711,0.990898,0.966862
4,0.930836,0.872246,0.988939,0.965725
5,0.91284,0.896939,0.986114,0.970733
6,0.906357,0.917948,0.967468,0.972462
7,0.92579,0.894633,0.989943,0.962607
8,0.909735,0.90837,0.980317,0.972565
9,0.90799,0.920028,0.98111,0.981665
