# Bagging Runs

This notebook runs the bagging ensemble with different base learners $n$ times and saves the output to a CSV file separately for each base learner.

## Imports

In [5]:
# sklearn
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import BaggingClassifier

# custom modules
from tadpole import Tadpole
from classifier import Classifier
from main import run, get_params

## Data Loading

In [2]:
%%time
tp = Tadpole()

Wall time: 4.5 s


In [3]:
n_runs = 10 # change to 30 for final evaluation

In [6]:
params_dtc = get_params('dtc')
params_svc = get_params('svc')
params_lra = get_params('lra')
params_ann = get_params('ann')

In [7]:
# dtc_out = './results/bag_dtc.csv'
# svc_out = './results/bag_svc.csv'
# lra_out = './results/bag_lra.csv'
# ann_out = './results/bag_ann.csv'

# Temporary
dtc_out = None
svc_out = None
lra_out = None
ann_out = None

## Bagging Ensembles

**TODO:** grid search on bagging in another notebook

### DTC

In [8]:
dtc = DecisionTreeClassifier()

bag_clf = BaggingClassifier(base_estimator = dtc, 
                            n_estimators = 100, 
                            max_samples = 0.3, 
                            random_state = 0)
bag_dtc = Classifier(bag_clf)
run(bag_dtc, tp, n_runs=n_runs, output=dtc_out)

100%|██████████████████████████████████████████| 10/10 [00:03<00:00,  2.57it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.920742,0.908225,0.99409,0.970838
1,0.925201,0.863183,0.994353,0.960208
2,0.919033,0.916465,0.99365,0.97449
3,0.924367,0.868908,0.994467,0.962951
4,0.932745,0.870906,0.994817,0.964432
5,0.925382,0.887279,0.994356,0.969443
6,0.922637,0.897145,0.9941,0.979154
7,0.921113,0.863058,0.994355,0.970796
8,0.925032,0.884375,0.993882,0.974798
9,0.919702,0.91461,0.99398,0.976776


### SVC

In [9]:
svc = SVC(probability=True)
svc.set_params(**params_svc)

bag_clf = BaggingClassifier(base_estimator = svc, 
                            n_estimators = 100, 
                            max_samples = 0.3, 
                            random_state = 0)
bag_svc = Classifier(bag_clf)
run(bag_svc, tp, n_runs=n_runs, output=svc_out)

100%|██████████████████████████████████████████| 10/10 [00:46<00:00,  4.64s/it]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.867533,0.910696,0.969533,0.973263
1,0.883602,0.85622,0.974299,0.954727
2,0.870472,0.882748,0.971212,0.962843
3,0.875693,0.855252,0.97069,0.966243
4,0.878515,0.869958,0.970704,0.966057
5,0.876595,0.882541,0.96984,0.969606
6,0.875987,0.886682,0.969171,0.971668
7,0.878454,0.858917,0.971357,0.966163
8,0.877156,0.873912,0.970797,0.970827
9,0.872996,0.888844,0.968555,0.976067


### LRA

In [10]:
lra = LogisticRegression()
lra.set_params(**params_lra)

bag_clf = BaggingClassifier(base_estimator = lra, 
                            n_estimators = 100, 
                            max_samples = 0.3, 
                            random_state = 0)
bag_lra = Classifier(bag_clf)
run(bag_lra, tp, n_runs=n_runs, output=lra_out)

100%|██████████████████████████████████████████| 10/10 [00:03<00:00,  3.18it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.781176,0.840034,0.932821,0.943016
1,0.800938,0.773983,0.939154,0.924547
2,0.585378,0.609326,0.933065,0.940464
3,0.795866,0.800654,0.93788,0.929735
4,0.792468,0.807863,0.931925,0.941515
5,0.793266,0.802961,0.934222,0.943338
6,0.801036,0.802551,0.931731,0.942735
7,0.805119,0.752357,0.932164,0.931036
8,0.795398,0.806977,0.933905,0.94127
9,0.74981,0.774005,0.930155,0.937721


### ANN

In [None]:
ann = MLPClassifier()
ann.set_params(**params_ann)

bag_clf = BaggingClassifier(base_estimator = ann, 
                            n_estimators = 100, 
                            max_samples = 0.3, 
                            random_state = 0)
bag_ann = Classifier(bag_clf)
run(bag_ann, tp, n_runs=n_runs, output=ann_out)

 10%|████▎                                      | 1/10 [01:10<10:35, 70.66s/it]