# Base Learner Runs

This notebook runs the base learners $n$ times and saves the output to a CSV file separately for each base learner.

## Imports

In [1]:
# sklearn
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier

# custom modules
from tadpole import Tadpole
from classifier import Classifier
from main import run, get_params

## Data Loading

In [2]:
%%time
tp = Tadpole()

CPU times: user 4.95 s, sys: 391 ms, total: 5.34 s
Wall time: 5.36 s


In [3]:
n_runs = 30

In [4]:
params_dtc = get_params('dtc')
params_svc = get_params('svc')
params_lra = get_params('lra')
params_ann = get_params('ann')

params_dtc_def = {}
params_svc_def = {}
params_lra_def = {}
params_ann_def = {}

In [5]:
# output (best params) filenames
dtc_out = './results/dtc.csv'
svc_out = './results/svc.csv'
lra_out = './results/lra.csv'
ann_out = './results/ann.csv'

# output (default params) filenames
dtc_out_def = './results/def_dtc.csv'
svc_out_def = './results/def_svc.csv'
lra_out_def = './results/def_lra.csv'
ann_out_def = './results/def_ann.csv'

**NOTE:** Do we want to give a random state (seed) to the classifiers as well? 

Not sure. 

## Base Learners: Parameters from Grid Search Experiment
### DTC

In [6]:
dtc = Classifier(DecisionTreeClassifier(), params_dtc)
run(dtc, tp, n_runs=n_runs, output=dtc_out)

100%|██████████| 30/30 [00:00<00:00, 72.12it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.666286,0.650966,0.842262,0.821925
1,0.787329,0.738146,0.905744,0.885722
2,0.70128,0.744083,0.851441,0.859964
3,0.84446,0.843826,0.928899,0.923627
4,0.758249,0.760535,0.892951,0.891104
5,0.814562,0.815195,0.922564,0.925005
6,0.708992,0.71807,0.842015,0.844372
7,0.669512,0.613632,0.836646,0.812738
8,0.586682,0.606072,0.766571,0.763006
9,0.696412,0.687731,0.854536,0.841813


### SVC

In [7]:
svc = Classifier(SVC(probability=True), params_svc)
run(svc, tp, n_runs=n_runs, output=svc_out)

100%|██████████| 30/30 [00:12<00:00,  2.41it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.89712,0.916402,0.966227,0.973028
1,0.902532,0.881761,0.971684,0.954167
2,0.892908,0.906579,0.968911,0.959662
3,0.90129,0.880793,0.970562,0.966664
4,0.902346,0.879083,0.967806,0.967737
5,0.896379,0.880153,0.967678,0.970711
6,0.895714,0.913932,0.963378,0.967101
7,0.899834,0.883634,0.96652,0.960561
8,0.900447,0.89626,0.97073,0.967619
9,0.893691,0.909256,0.965521,0.974655


### LRA

In [8]:
lra = Classifier(LogisticRegression(), params_lra)
run(lra, tp, n_runs=n_runs, output=lra_out)

100%|██████████| 30/30 [00:00<00:00, 67.06it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.890812,0.918112,0.966695,0.972436
1,0.897712,0.880875,0.969817,0.956873
2,0.888217,0.926351,0.967176,0.97143
3,0.899215,0.882358,0.967932,0.966673
4,0.895333,0.896919,0.967272,0.970815
5,0.896775,0.894796,0.967329,0.971307
6,0.887661,0.923673,0.965709,0.976834
7,0.895776,0.906598,0.96784,0.967061
8,0.894458,0.893664,0.967801,0.968511
9,0.889721,0.910513,0.964945,0.976248


### ANN

In [9]:
ann = Classifier(MLPClassifier(), params_ann)
run(ann, tp, n_runs=n_runs, output=ann_out)

100%|██████████| 30/30 [00:32<00:00,  1.08s/it]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.889324,0.918256,0.979422,0.976445
1,0.902119,0.860073,0.980387,0.961774
2,0.875041,0.882029,0.976078,0.968955
3,0.890992,0.867487,0.979781,0.963271
4,0.903473,0.876343,0.97873,0.969898
5,0.886878,0.88283,0.97856,0.974669
6,0.887176,0.885097,0.976264,0.975689
7,0.887764,0.850701,0.979929,0.967576
8,0.89784,0.884375,0.97892,0.972196
9,0.870737,0.882357,0.971964,0.97693


## Base Learners: Default Parameters
### DTC

In [10]:
dtc = Classifier(DecisionTreeClassifier(), params_dtc_def)
run(dtc, tp, n_runs=n_runs, output=dtc_out_def)

100%|██████████| 30/30 [00:00<00:00, 55.76it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,1.0,0.817894,1.0,0.86342
1,1.0,0.84683,1.0,0.885122
2,1.0,0.867695,1.0,0.900771
3,1.0,0.759566,1.0,0.819675
4,1.0,0.814104,1.0,0.860578
5,1.0,0.76729,1.0,0.825467
6,1.0,0.833236,1.0,0.874927
7,1.0,0.79116,1.0,0.84337
8,1.0,0.834719,1.0,0.876039
9,1.0,0.84955,1.0,0.887162


### SVC

In [11]:
svc = Classifier(SVC(probability=True), params_svc_def)
run(svc, tp, n_runs=n_runs, output=svc_out_def)

100%|██████████| 30/30 [00:07<00:00,  4.29it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.874258,0.887568,0.973571,0.968962
1,0.883995,0.838345,0.976554,0.950847
2,0.883541,0.869775,0.974725,0.962035
3,0.887934,0.85688,0.974199,0.960363
4,0.879463,0.864069,0.974298,0.964596
5,0.881595,0.860299,0.973785,0.963103
6,0.890075,0.857169,0.972306,0.972502
7,0.889102,0.802302,0.975139,0.956837
8,0.889664,0.86312,0.97348,0.9699
9,0.876251,0.877229,0.970799,0.973923


### LRA

In [12]:
lra = Classifier(LogisticRegression(), params_lra_def)
run(lra, tp, n_runs=n_runs, output=lra_out_def)

100%|██████████| 30/30 [00:01<00:00, 19.00it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.802354,0.824813,0.958034,0.967494
1,0.816241,0.772172,0.962118,0.944795
2,0.806483,0.829181,0.960555,0.954671
3,0.812317,0.775239,0.961239,0.950376
4,0.808435,0.809943,0.959566,0.959496
5,0.797911,0.838653,0.959313,0.961266
6,0.812472,0.781605,0.958726,0.959901
7,0.812091,0.772499,0.960753,0.958558
8,0.806128,0.809366,0.959753,0.963492
9,0.799177,0.824134,0.957909,0.966269


### ANN

In [13]:
ann = Classifier(MLPClassifier(), params_ann_def)
run(ann, tp, n_runs=n_runs, output=ann_out_def)

100%|██████████| 30/30 [00:27<00:00,  1.10it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.874608,0.909935,0.972353,0.975527
1,0.88368,0.839395,0.975088,0.957072
2,0.878249,0.881515,0.972818,0.968322
3,0.888093,0.862359,0.974433,0.966067
4,0.885575,0.868373,0.973507,0.969392
5,0.883171,0.87175,0.972721,0.971204
6,0.87952,0.881081,0.971797,0.974273
7,0.881148,0.873151,0.975014,0.967183
8,0.874366,0.880585,0.973053,0.972546
9,0.8726,0.905919,0.9718,0.979766
