# Base Learner Runs

This notebook runs the base learners $n$ times and saves the output to a CSV file separately for each base learner.

## Imports

In [1]:
# sklearn
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier

# custom modules
from tadpole import Tadpole
from classifier import Classifier
from main import run, get_params

## Data Loading

In [2]:
%%time
tp = Tadpole()

Wall time: 4.47 s


In [None]:
n_runs = 10 # change to 30 for final evaluation

In [6]:
params_dtc = get_params('dtc')
params_svc = get_params('svc')
params_lra = get_params('lra')
params_ann = get_params('ann')

In [7]:
# dtc_out = './results/dtc.csv'
# svc_out = './results/svc.csv'
# lra_out = './results/lra.csv'
# ann_out = './results/ann.csv'

# Temporary
dtc_out = None
svc_out = None
lra_out = None
ann_out = None

**NOTE:** Do we want to give a random state (seed) to the classifiers as well?

## Base Learners

### DTC

**NOTE:** DTC with best params from grid search performs much worse at times (i.e., inconsistent) than default params.

In [8]:
dtc = Classifier(DecisionTreeClassifier(), params_dtc)
run(dtc, tp, n_runs=n_runs, output=dtc_out)

100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 54.15it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,1.0,0.810912,1.0,0.858184
1,1.0,0.844235,1.0,0.883176
2,1.0,0.844793,1.0,0.883595
3,1.0,0.763211,1.0,0.822408
4,1.0,0.814701,1.0,0.861026
5,1.0,0.772418,1.0,0.829313
6,1.0,0.815525,1.0,0.861644
7,1.0,0.797256,1.0,0.847942
8,1.0,0.843864,1.0,0.882898
9,1.0,0.852969,1.0,0.889727


### SVC

In [9]:
svc = Classifier(SVC(probability=True), params_svc)
run(svc, tp, n_runs=n_runs, output=svc_out)

100%|██████████████████████████████████████████| 10/10 [00:04<00:00,  2.17it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.89712,0.916402,0.966224,0.972994
1,0.902532,0.881761,0.971524,0.954123
2,0.892908,0.906579,0.968978,0.960283
3,0.90129,0.880793,0.970455,0.966464
4,0.902346,0.879083,0.967706,0.967304
5,0.896379,0.880153,0.967637,0.970254
6,0.895714,0.913932,0.963426,0.967246
7,0.899834,0.883634,0.966508,0.960295
8,0.900447,0.89626,0.970685,0.967474
9,0.893691,0.909256,0.965578,0.974704


### LRA

In [10]:
lra = Classifier(LogisticRegression(), params_lra)
run(lra, tp, n_runs=n_runs, output=lra_out)

100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 49.99it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.890812,0.918112,0.966716,0.972457
1,0.897712,0.880875,0.969845,0.956953
2,0.88779,0.926351,0.967181,0.971432
3,0.899215,0.882358,0.967938,0.966696
4,0.895333,0.896919,0.967278,0.970838
5,0.896775,0.894796,0.967312,0.97132
6,0.887234,0.923673,0.965724,0.976834
7,0.895776,0.906598,0.96784,0.967061
8,0.89403,0.893664,0.967799,0.968511
9,0.890725,0.910513,0.964941,0.976227


### ANN

In [11]:
ann = Classifier(MLPClassifier(), params_ann)
run(ann, tp, n_runs=n_runs, output=ann_out)

100%|██████████████████████████████████████████| 10/10 [00:16<00:00,  1.63s/it]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.881348,0.908225,0.978928,0.97514
1,0.895065,0.834782,0.983343,0.9588
2,0.892233,0.90213,0.976882,0.966923
3,0.89716,0.872038,0.980321,0.96518
4,0.884072,0.839664,0.980533,0.967615
5,0.891513,0.879782,0.976196,0.972597
6,0.893917,0.896013,0.976658,0.978458
7,0.898283,0.873748,0.981886,0.967765
8,0.895868,0.883489,0.978209,0.97266
9,0.883789,0.882357,0.98069,0.976398
