# Base Learner Runs (with Refit)

This notebook runs the base learners $n$ times and saves the output to a CSV file separately for each base learner. Use `sfm` model to refit data from Features Importance experiment

## Imports

In [1]:
# sklearn
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier

# custom modules
from tadpole import Tadpole
from classifier import Classifier
from main import run, get_params

## Data Loading

In [2]:
%%time
tp = Tadpole()

CPU times: user 10.9 s, sys: 731 ms, total: 11.6 s
Wall time: 11.8 s


In [3]:
n_runs = 30

In [4]:
params_dtc = get_params('refit_dtc')
params_svc = get_params('refit_svc')
params_lra = get_params('refit_lra')
params_ann = get_params('refit_ann')

params_dtc_def = {}
params_svc_def = {}
params_lra_def = {}
params_ann_def = {}

In [5]:
# output (best params) filenames
dtc_out = './results/refit_dtc.csv'
svc_out = './results/refit_svc.csv'
lra_out = './results/refit_lra.csv'
ann_out = './results/refit_ann.csv'

# output (default params) filenames
dtc_out_def = './results/def_refit_dtc.csv'
svc_out_def = './results/def_refit_svc.csv'
lra_out_def = './results/def_refit_lra.csv'
ann_out_def = './results/def_refit_ann.csv'

## Base Learners: Parameters from Grid Search (with Refit) Experiment
### DTC

In [6]:
dtc = Classifier(DecisionTreeClassifier(), params_dtc)
run(dtc, tp, n_runs=n_runs, output=dtc_out, refit=True)

100%|██████████| 30/30 [00:26<00:00,  1.13it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.860698,0.840674,0.943128,0.941342
1,0.888831,0.84749,0.963232,0.947798
2,0.800366,0.842383,0.903277,0.915746
3,0.838218,0.799585,0.939654,0.9133
4,0.726149,0.747583,0.871082,0.88645
5,0.839218,0.864793,0.936957,0.947302
6,0.799084,0.814907,0.899903,0.916875
7,0.865989,0.875457,0.951907,0.956527
8,0.842395,0.852085,0.949573,0.954655
9,0.83385,0.842257,0.933484,0.934984


### SVC

In [7]:
svc = Classifier(SVC(probability=True), params_svc)
run(svc, tp, n_runs=n_runs, output=svc_out, refit=True)

100%|██████████| 30/30 [00:30<00:00,  1.01s/it]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.894129,0.914467,0.96538,0.975854
1,0.901193,0.878116,0.965822,0.945078
2,0.893593,0.906353,0.969343,0.964463
3,0.897815,0.890144,0.971224,0.967542
4,0.899355,0.90211,0.961489,0.960935
5,0.89712,0.907855,0.963624,0.971107
6,0.889361,0.915415,0.960175,0.964738
7,0.896101,0.894035,0.964012,0.963832
8,0.895545,0.901162,0.960895,0.965959
9,0.893948,0.923736,0.957641,0.973423


### LRA

In [8]:
lra = Classifier(LogisticRegression(), params_lra)
run(lra, tp, n_runs=n_runs, output=lra_out, refit=True)

100%|██████████| 30/30 [00:26<00:00,  1.13it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.875069,0.876345,0.970965,0.974977
1,0.901451,0.874697,0.973264,0.960196
2,0.895545,0.904644,0.971959,0.970414
3,0.895158,0.899144,0.972686,0.969626
4,0.899247,0.902707,0.97177,0.972268
5,0.896899,0.907855,0.97063,0.977362
6,0.891642,0.913706,0.970061,0.97862
7,0.879041,0.879411,0.971575,0.969894
8,0.893202,0.900709,0.972126,0.972307
9,0.889191,0.916898,0.969605,0.982112


### ANN

In [9]:
ann = Classifier(MLPClassifier(), params_ann)
run(ann, tp, n_runs=n_runs, output=ann_out, refit=True)

100%|██████████| 30/30 [01:32<00:00,  3.07s/it]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.887821,0.926659,0.971349,0.974923
1,0.895518,0.880504,0.97441,0.961
2,0.886023,0.910451,0.971538,0.971436
3,0.895997,0.8776,0.972999,0.968609
4,0.884844,0.878712,0.972199,0.972218
5,0.893057,0.891069,0.970532,0.97652
6,0.873794,0.90458,0.970298,0.976846
7,0.882126,0.884664,0.97162,0.971076
8,0.886765,0.88178,0.972228,0.97227
9,0.883222,0.909482,0.969725,0.982054


## Base Learners: Default Parameters
### DTC

In [10]:
dtc = Classifier(DecisionTreeClassifier(), params_dtc_def)
run(dtc, tp, n_runs=n_runs, output=dtc_out_def, refit=True)

100%|██████████| 30/30 [00:26<00:00,  1.15it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.999145,0.824978,0.999997,0.868733
1,1.0,0.826832,1.0,0.870124
2,0.999145,0.8257,0.999998,0.871402
3,1.0,0.803848,1.0,0.852886
4,0.999573,0.825329,0.999999,0.868997
5,0.999145,0.80875,0.999997,0.856562
6,0.999145,0.813815,0.999997,0.860362
7,0.999145,0.812415,0.999998,0.859311
8,0.999145,0.837541,0.999998,0.878156
9,0.999573,0.818862,0.999999,0.86521


### SVC

In [11]:
svc = Classifier(SVC(probability=True), params_svc_def)
run(svc, tp, n_runs=n_runs, output=svc_out_def, refit=True)

100%|██████████| 30/30 [00:33<00:00,  1.12s/it]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.873856,0.906454,0.972041,0.97518
1,0.883773,0.865263,0.975233,0.956716
2,0.875251,0.880566,0.972966,0.968583
3,0.882553,0.871152,0.973131,0.970512
4,0.880215,0.865118,0.972734,0.972255
5,0.87692,0.887217,0.972043,0.972823
6,0.875962,0.891646,0.97096,0.979466
7,0.874505,0.846931,0.97151,0.969853
8,0.876441,0.882006,0.97213,0.973772
9,0.874268,0.88868,0.970407,0.982543


### LRA

In [12]:
lra = Classifier(LogisticRegression(), params_lra_def)
run(lra, tp, n_runs=n_runs, output=lra_out_def, refit=True)

100%|██████████| 30/30 [00:27<00:00,  1.09it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.807601,0.857581,0.962474,0.970056
1,0.821534,0.764303,0.965784,0.951392
2,0.809217,0.801459,0.9634,0.962506
3,0.820159,0.808831,0.964791,0.961141
4,0.811169,0.807328,0.963042,0.968624
5,0.806854,0.84786,0.962832,0.969709
6,0.814314,0.851278,0.961497,0.97401
7,0.811699,0.764467,0.964093,0.957511
8,0.818079,0.811055,0.963566,0.965254
9,0.804023,0.839519,0.960705,0.978779


### ANN

In [13]:
ann = Classifier(MLPClassifier(), params_ann_def)
run(ann, tp, n_runs=n_runs, output=ann_out_def, refit=True)

100%|██████████| 30/30 [01:17<00:00,  2.59s/it]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.868059,0.901903,0.971212,0.975151
1,0.886095,0.860361,0.974082,0.959287
2,0.871312,0.895807,0.971007,0.970921
3,0.885699,0.866313,0.971777,0.968691
4,0.873517,0.867776,0.971097,0.972492
5,0.87641,0.873233,0.969964,0.974481
6,0.868542,0.90145,0.970046,0.977758
7,0.872029,0.881183,0.972347,0.969255
8,0.876266,0.879473,0.971423,0.971382
9,0.867832,0.903983,0.968638,0.983655
