# Base Learner Runs

This notebook runs the base learners $n$ times and saves the output to a CSV file separately for each base learner.

## Imports

In [1]:
# sklearn
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier

# custom modules
from tadpole import Tadpole
from classifier import Classifier
from main import run, get_params

## Data Loading

In [2]:
%%time
tp = Tadpole()

CPU times: user 5.33 s, sys: 410 ms, total: 5.74 s
Wall time: 5.74 s


In [3]:
n_runs = 30

In [4]:
params_dtc = get_params('dtc')
params_svc = get_params('svc')
params_lra = get_params('lra')
params_ann = get_params('ann')

params_dtc_def = {}
params_svc_def = {}
params_lra_def = {}
params_ann_def = {}

In [5]:
# output (best params) filenames
dtc_out = './results/dtc.csv'
svc_out = './results/svc.csv'
lra_out = './results/lra.csv'
ann_out = './results/ann.csv'

# output (default params) filenames
dtc_out_def = './results/def_dtc.csv'
svc_out_def = './results/def_svc.csv'
lra_out_def = './results/def_lra.csv'
ann_out_def = './results/def_ann.csv'

**NOTE:** Do we want to give a random state (seed) to the classifiers as well? 

Not sure. 

## Base Learners: Parameters from Grid Search Experiment
### DTC

In [6]:
dtc = Classifier(DecisionTreeClassifier(), params_dtc)
run(dtc, tp, n_runs=n_runs, output=dtc_out)

100%|██████████| 30/30 [00:00<00:00, 73.19it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.676373,0.622112,0.84284,0.798982
1,0.697163,0.689234,0.859279,0.837495
2,0.617494,0.6383,0.791474,0.801502
3,0.741932,0.728884,0.876773,0.858609
4,0.717832,0.730343,0.852407,0.863849
5,0.650465,0.67033,0.816837,0.825736
6,0.752543,0.731846,0.891454,0.869768
7,0.856851,0.848107,0.953115,0.955719
8,0.803084,0.804856,0.910809,0.911898
9,0.735548,0.704886,0.869218,0.827869


### SVC

In [7]:
svc = Classifier(SVC(probability=True), params_svc)
run(svc, tp, n_runs=n_runs, output=svc_out)

100%|██████████| 30/30 [00:14<00:00,  2.02it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.89712,0.916402,0.966213,0.973142
1,0.902532,0.881761,0.971582,0.95408
2,0.892908,0.906579,0.9689,0.959984
3,0.90129,0.880793,0.970341,0.966377
4,0.902346,0.879083,0.96784,0.967702
5,0.896379,0.880153,0.967615,0.970337
6,0.895714,0.913932,0.963358,0.966962
7,0.899834,0.883634,0.966444,0.960393
8,0.900447,0.89626,0.970728,0.967453
9,0.893691,0.909256,0.965553,0.974565


### LRA

In [8]:
lra = Classifier(LogisticRegression(), params_lra)
run(lra, tp, n_runs=n_runs, output=lra_out)

100%|██████████| 30/30 [00:00<00:00, 46.11it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.890812,0.918112,0.966709,0.972434
1,0.897712,0.880875,0.969845,0.956953
2,0.88779,0.926351,0.967183,0.97142
3,0.899215,0.882358,0.967943,0.966708
4,0.895333,0.896919,0.967267,0.970838
5,0.896775,0.894796,0.967328,0.971317
6,0.887661,0.923673,0.965707,0.976821
7,0.895776,0.906598,0.967838,0.967048
8,0.894458,0.893664,0.967813,0.968546
9,0.889721,0.910513,0.964973,0.976258


### ANN

In [9]:
ann = Classifier(MLPClassifier(), params_ann)
run(ann, tp, n_runs=n_runs, output=ann_out)

100%|██████████| 30/30 [00:49<00:00,  1.64s/it]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.898638,0.913065,0.981447,0.975117
1,0.900991,0.864686,0.97728,0.956458
2,0.892223,0.873709,0.979237,0.968315
3,0.890534,0.872038,0.97768,0.966078
4,0.894823,0.872389,0.977491,0.971079
5,0.904447,0.882089,0.978383,0.974429
6,0.889962,0.898917,0.97518,0.977336
7,0.897624,0.854737,0.978789,0.965227
8,0.853163,0.855068,0.977335,0.974176
9,0.886152,0.870063,0.981013,0.975278


## Base Learners: Default Parameters
### DTC

In [10]:
dtc = Classifier(DecisionTreeClassifier(), params_dtc_def)
run(dtc, tp, n_runs=n_runs, output=dtc_out_def)

100%|██████████| 30/30 [00:00<00:00, 40.62it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,1.0,0.801767,1.0,0.851325
1,1.0,0.867261,1.0,0.900446
2,1.0,0.861146,1.0,0.895859
3,1.0,0.771903,1.0,0.828927
4,1.0,0.814928,1.0,0.861196
5,1.0,0.784528,1.0,0.838396
6,1.0,0.838962,1.0,0.879221
7,1.0,0.794415,1.0,0.845811
8,1.0,0.847653,1.0,0.88574
9,1.0,0.84642,1.0,0.884815


### SVC

In [11]:
svc = Classifier(SVC(probability=True), params_svc_def)
run(svc, tp, n_runs=n_runs, output=svc_out_def)

100%|██████████| 30/30 [00:10<00:00,  2.91it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.874258,0.887568,0.97355,0.968913
1,0.883995,0.838345,0.97668,0.950976
2,0.883541,0.869775,0.974789,0.961961
3,0.887934,0.85688,0.974257,0.960422
4,0.879463,0.864069,0.974449,0.964512
5,0.881595,0.860299,0.97375,0.963024
6,0.890075,0.857169,0.972156,0.972661
7,0.889102,0.802302,0.97509,0.956633
8,0.889664,0.86312,0.973492,0.969944
9,0.876251,0.877229,0.970814,0.973951


### LRA

In [12]:
lra = Classifier(LogisticRegression(), params_lra_def)
run(lra, tp, n_runs=n_runs, output=lra_out_def)

100%|██████████| 30/30 [00:02<00:00, 15.00it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.802354,0.824813,0.958034,0.967494
1,0.816241,0.772172,0.962118,0.944795
2,0.806483,0.829181,0.960555,0.954671
3,0.812317,0.775239,0.961239,0.950376
4,0.808435,0.809943,0.959566,0.959496
5,0.797911,0.838653,0.959313,0.961266
6,0.812472,0.781605,0.958726,0.959901
7,0.812091,0.772499,0.960753,0.958558
8,0.806128,0.809366,0.959753,0.963492
9,0.799177,0.824134,0.957909,0.966269


### ANN

In [13]:
ann = Classifier(MLPClassifier(), params_ann_def)
run(ann, tp, n_runs=n_runs, output=ann_out_def)

100%|██████████| 30/30 [00:46<00:00,  1.55s/it]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.879783,0.912839,0.972549,0.974851
1,0.885792,0.838571,0.975376,0.957095
2,0.882826,0.895869,0.972769,0.968503
3,0.887228,0.860587,0.974524,0.966268
4,0.889236,0.861762,0.97353,0.968957
5,0.882502,0.876281,0.972403,0.972155
6,0.881173,0.882791,0.971747,0.97361
7,0.881055,0.863635,0.97511,0.96723
8,0.870628,0.87486,0.973145,0.972279
9,0.8726,0.9025,0.97122,0.980137
