# Bagging Runs

This notebook runs the bagging ensemble with different base learners $n$ times and saves the output to a CSV file separately for each base learner.

## Imports

In [1]:
# sklearn
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import BaggingClassifier

# custom modules
from tadpole import Tadpole
from classifier import Classifier
from main import run, get_params

## Data Loading

In [2]:
%%time
tp = Tadpole()

Wall time: 4.6 s


In [3]:
# number of runs
n_runs = 30

In [4]:
# parameters
params_dtc = get_params('dtc')
params_svc = get_params('svc')
params_lra = get_params('lra')
params_ann = get_params('ann')

params_bag_dtc = {'max_samples': 0.9, 'n_estimators': 100}
params_bag_svc = {'max_samples': 0.7, 'n_estimators': 10} 
params_bag_lra = {'max_samples': 0.9, 'n_estimators': 50} 
params_bag_ann = {'max_samples': 0.7, 'n_estimators': 10} 

In [5]:
# output
dtc_out = './results/bag_dtc.csv'
svc_out = './results/bag_svc.csv'
lra_out = './results/bag_lra.csv'
ann_out = './results/bag_ann.csv'

## Bagging Ensembles

### DTC

In [6]:
dtc = DecisionTreeClassifier()
dtc.set_params(**params_dtc)

bag_clf = BaggingClassifier(base_estimator = dtc,                             
                            random_state = 0,
                            **params_bag_dtc)
bag_dtc = Classifier(bag_clf)
run(bag_dtc, tp, n_runs=n_runs, output=dtc_out)

100%|██████████████████████████████████████████| 30/30 [00:07<00:00,  3.77it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.856945,0.895437,0.9527,0.961508
1,0.866439,0.850087,0.957587,0.944215
2,0.861661,0.875358,0.957457,0.957867
3,0.872293,0.840571,0.960191,0.942431
4,0.87181,0.842507,0.957702,0.952599
5,0.864987,0.876407,0.956518,0.959086
6,0.866141,0.869261,0.956205,0.959336
7,0.857243,0.87581,0.957659,0.957945
8,0.866548,0.869714,0.954665,0.963973
9,0.862557,0.874842,0.956928,0.951816


### SVC

In [7]:
svc = SVC(probability=True)
svc.set_params(**params_svc)

bag_clf = BaggingClassifier(base_estimator = svc,                             
                            random_state = 0,
                            **params_bag_svc)
bag_svc = Classifier(bag_clf)
run(bag_svc, tp, n_runs=n_runs, output=svc_out)

100%|██████████████████████████████████████████| 30/30 [00:49<00:00,  1.65s/it]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.876054,0.923466,0.96828,0.97215
1,0.89127,0.852802,0.971763,0.955661
2,0.874406,0.91216,0.970919,0.963947
3,0.883361,0.856961,0.970342,0.965777
4,0.887037,0.868249,0.971202,0.966762
5,0.882666,0.887443,0.966933,0.97019
6,0.877156,0.889874,0.968394,0.972584
7,0.891023,0.881718,0.970995,0.966682
8,0.889354,0.883716,0.970364,0.971323
9,0.878583,0.896938,0.966522,0.973026


### LRA

In [8]:
lra = LogisticRegression()
lra.set_params(**params_lra)

bag_clf = BaggingClassifier(base_estimator = lra,                             
                            random_state = 0,
                            **params_bag_lra)
bag_lra = Classifier(bag_clf)
run(bag_lra, tp, n_runs=n_runs, output=lra_out)

100%|██████████████████████████████████████████| 30/30 [00:09<00:00,  3.27it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.890143,0.918338,0.966076,0.972312
1,0.895853,0.880875,0.969274,0.956673
2,0.890318,0.922335,0.966995,0.971322
3,0.899941,0.872554,0.967721,0.966697
4,0.895482,0.899226,0.96686,0.970623
5,0.895807,0.894796,0.966865,0.97068
6,0.886564,0.920254,0.965235,0.976019
7,0.892655,0.906001,0.967219,0.967641
8,0.892841,0.891955,0.967316,0.968855
9,0.888979,0.90903,0.964474,0.976066


### ANN

In [9]:
ann = MLPClassifier()
ann.set_params(**params_ann)

bag_clf = BaggingClassifier(base_estimator = ann,                              
                            random_state = 0,
                            **params_bag_ann)
bag_ann = Classifier(bag_clf)
run(bag_ann, tp, n_runs=n_runs, output=ann_out)

100%|██████████████████████████████████████████| 30/30 [06:40<00:00, 13.34s/it]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.884716,0.910821,0.97891,0.975474
1,0.893129,0.836265,0.983305,0.96029
2,0.892856,0.888064,0.981319,0.968552
3,0.902016,0.866375,0.980658,0.965294
4,0.902,0.865778,0.981006,0.968262
5,0.893443,0.869958,0.980894,0.972937
6,0.900399,0.881081,0.979252,0.975867
7,0.892249,0.848457,0.981058,0.967793
8,0.893114,0.871441,0.981053,0.970628
9,0.892094,0.904209,0.980121,0.976358
