# Base Learner Runs (with Refit)

This notebook runs the base learners $n$ times and saves the output to a CSV file separately for each base learner. Use `sfm` model to refit data from Features Importance experiment

## Imports

In [1]:
# sklearn
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier

# custom modules
from tadpole import Tadpole
from classifier import Classifier
from main import run, get_params

## Data Loading

In [2]:
%%time
tp = Tadpole()

CPU times: user 10.8 s, sys: 684 ms, total: 11.5 s
Wall time: 11.6 s


In [3]:
n_runs = 30

In [4]:
params_dtc = get_params('refit_dtc')
params_svc = get_params('refit_svc')
params_lra = get_params('refit_lra')
params_ann = get_params('refit_ann')

In [5]:
# output (best params) filenames
dtc_out = './results/refit_dtc.csv'
svc_out = './results/refit_svc.csv'
lra_out = './results/refit_lra.csv'
ann_out = './results/refit_ann.csv'

## Base Learners: Parameters from Grid Search (with Refit) Experiment
### DTC

In [6]:
dtc = Classifier(DecisionTreeClassifier(), params_dtc)
run(dtc, tp, n_runs=n_runs, output=dtc_out, refit=True)

100%|██████████| 30/30 [00:27<00:00,  1.09it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.770184,0.781214,0.903366,0.897843
1,0.75629,0.702932,0.880984,0.835268
2,0.780038,0.789929,0.919506,0.914871
3,0.868548,0.854121,0.948348,0.937404
4,0.821759,0.836124,0.898156,0.899625
5,0.839349,0.820261,0.933687,0.933336
6,0.765043,0.746326,0.882224,0.872584
7,0.837753,0.826647,0.941427,0.930423
8,0.823965,0.799522,0.92012,0.902128
9,0.653356,0.657582,0.812052,0.803972


### SVC

In [7]:
svc = Classifier(SVC(probability=True), params_svc)
run(svc, tp, n_runs=n_runs, output=svc_out, refit=True)

100%|██████████| 30/30 [00:29<00:00,  1.01it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.894129,0.914467,0.965114,0.975795
1,0.901193,0.878116,0.965839,0.945041
2,0.893593,0.906353,0.969517,0.964537
3,0.897815,0.890144,0.971304,0.967459
4,0.899355,0.90211,0.961355,0.96076
5,0.89712,0.907855,0.963393,0.970623
6,0.889361,0.915415,0.960355,0.964843
7,0.896101,0.894035,0.964035,0.963639
8,0.895545,0.901162,0.960721,0.965742
9,0.893948,0.923736,0.957993,0.974002


### LRA

In [8]:
lra = Classifier(LogisticRegression(), params_lra)
run(lra, tp, n_runs=n_runs, output=lra_out, refit=True)

100%|██████████| 30/30 [00:25<00:00,  1.16it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.890082,0.911274,0.971318,0.97551
1,0.902892,0.881535,0.973871,0.960823
2,0.897939,0.901225,0.971825,0.971041
3,0.8954,0.898773,0.972934,0.969762
4,0.897166,0.906126,0.97174,0.971757
5,0.895194,0.899906,0.970409,0.976761
6,0.890735,0.918834,0.970678,0.977504
7,0.89436,0.884602,0.971346,0.971045
8,0.892981,0.904354,0.972163,0.972193
9,0.887595,0.918607,0.96952,0.981639


### ANN

In [9]:
ann = Classifier(MLPClassifier(), params_ann)
run(ann, tp, n_runs=n_runs, output=ann_out, refit=True)

100%|██████████| 30/30 [01:29<00:00,  2.98s/it]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.874103,0.918564,0.970819,0.974253
1,0.888088,0.859475,0.974319,0.961093
2,0.874917,0.901533,0.971661,0.971159
3,0.896465,0.868022,0.97311,0.969395
4,0.864501,0.862277,0.972147,0.971373
5,0.892022,0.891892,0.970865,0.975895
6,0.877728,0.909482,0.970785,0.977638
7,0.886044,0.898133,0.972044,0.970831
8,0.890652,0.878361,0.971641,0.971179
9,0.88625,0.914384,0.969514,0.982466
