# Base Learner Runs (with Manual Feature Selection)

This notebook runs the base learners $n$ times and saves the output to a CSV file separately for each base learner. Use `sfm` model to refit data from Features Importance experiment

## Imports

In [1]:
# sklearn
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier

# custom modules
from tadpole import Tadpole
from classifier import Classifier
from main import run, get_params

## Data Loading

In [2]:
%%time
tp = Tadpole(isCorr=True)

CPU times: user 4.19 s, sys: 316 ms, total: 4.51 s
Wall time: 4.52 s


In [3]:
n_runs = 30

In [4]:
params_dtc = get_params('corr_dtc')
params_svc = get_params('corr_svc')
params_lra = get_params('corr_lra')
params_ann = get_params('corr_ann')

params_dtc_def = {}
params_svc_def = {}
params_lra_def = {}
params_ann_def = {}

In [5]:
# output (best params) filenames
dtc_out = './results/corr_dtc.csv'
svc_out = './results/corr_svc.csv'
lra_out = './results/corr_lra.csv'
ann_out = './results/corr_ann.csv'

# output (default params) filenames
dtc_out_def = './results/def_corr_dtc.csv'
svc_out_def = './results/def_corr_svc.csv'
lra_out_def = './results/def_corr_lra.csv'
ann_out_def = './results/def_corr_ann.csv'

## Base Learners: Parameters from Grid Search (with Manual Selection) Experiment
### DTC

In [6]:
dtc = Classifier(DecisionTreeClassifier(), params_dtc)
run(dtc, tp, n_runs=n_runs, output=dtc_out)

100%|██████████| 30/30 [00:00<00:00, 80.37it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.620954,0.622962,0.762238,0.745801
1,0.583923,0.508629,0.741294,0.685528
2,0.580455,0.565352,0.739006,0.711058
3,0.856356,0.805679,0.91791,0.891607
4,0.781054,0.77207,0.894036,0.888465
5,0.865346,0.86693,0.930689,0.924799
6,0.815973,0.802365,0.907665,0.895673
7,0.744334,0.752011,0.875111,0.889203
8,0.673313,0.694343,0.84223,0.848857
9,0.813267,0.795631,0.906297,0.909495


### SVC

In [7]:
svc = Classifier(SVC(probability=True), params_svc)
run(svc, tp, n_runs=n_runs, output=svc_out)

100%|██████████| 30/30 [00:10<00:00,  2.97it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.867771,0.894633,0.952215,0.961017
1,0.880649,0.848704,0.960604,0.938331
2,0.869486,0.889485,0.954871,0.955482
3,0.87898,0.863925,0.956233,0.950342
4,0.88069,0.851733,0.957382,0.949851
5,0.87498,0.88141,0.951088,0.950518
6,0.86722,0.897064,0.952113,0.961443
7,0.869913,0.897228,0.954736,0.962655
8,0.873888,0.870248,0.95788,0.95808
9,0.873831,0.880875,0.953667,0.94953


### LRA

In [8]:
lra = Classifier(LogisticRegression(), params_lra)
run(lra, tp, n_runs=n_runs, output=lra_out)

100%|██████████| 30/30 [00:01<00:00, 29.37it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.871309,0.884376,0.95753,0.964537
1,0.882549,0.843576,0.962836,0.940901
2,0.868466,0.891566,0.959545,0.96063
3,0.877441,0.856572,0.96109,0.95144
4,0.881565,0.857232,0.961359,0.953954
5,0.870598,0.887506,0.960393,0.955469
6,0.8636,0.90487,0.957222,0.96807
7,0.866983,0.895292,0.958817,0.962255
8,0.8717,0.864831,0.960033,0.958576
9,0.871159,0.878053,0.959771,0.954099


### ANN

In [9]:
ann = Classifier(MLPClassifier(), params_ann)
run(ann, tp, n_runs=n_runs, output=ann_out)

100%|██████████| 30/30 [00:19<00:00,  1.53it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.858126,0.901923,0.959208,0.965049
1,0.86917,0.831258,0.962869,0.942385
2,0.852642,0.894406,0.956834,0.957635
3,0.864119,0.807263,0.962985,0.951959
4,0.870113,0.861844,0.962711,0.955207
5,0.8526,0.854983,0.960075,0.953903
6,0.854758,0.874942,0.956526,0.96867
7,0.856277,0.847178,0.960739,0.962814
8,0.864753,0.835563,0.960852,0.957696
9,0.859995,0.866251,0.960443,0.955573


## Base Learners: Default Parameters
### DTC

In [10]:
dtc = Classifier(DecisionTreeClassifier(), params_dtc_def)
run(dtc, tp, n_runs=n_runs, output=dtc_out_def)

100%|██████████| 30/30 [00:00<00:00, 69.72it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,1.0,0.738971,1.0,0.804228
1,1.0,0.801519,1.0,0.851139
2,1.0,0.835668,1.0,0.876751
3,1.0,0.71893,1.0,0.789197
4,1.0,0.752604,1.0,0.814453
5,1.0,0.754087,1.0,0.815565
6,1.0,0.822816,1.0,0.867112
7,1.0,0.76733,1.0,0.825497
8,1.0,0.775713,1.0,0.831785
9,1.0,0.802013,1.0,0.85151


### SVC

In [11]:
svc = Classifier(SVC(probability=True), params_svc_def)
run(svc, tp, n_runs=n_runs, output=svc_out_def)

100%|██████████| 30/30 [00:04<00:00,  6.09it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.831541,0.838611,0.958769,0.961516
1,0.838992,0.769843,0.964044,0.930053
2,0.837473,0.85204,0.959407,0.958971
3,0.846067,0.776864,0.961046,0.947045
4,0.841536,0.80537,0.962608,0.947905
5,0.834311,0.816758,0.961376,0.94694
6,0.8348,0.836078,0.956506,0.966572
7,0.841294,0.759605,0.959959,0.946747
8,0.840295,0.818117,0.960645,0.954853
9,0.835028,0.813917,0.960905,0.949013


### LRA

In [12]:
lra = Classifier(LogisticRegression(), params_lra_def)
run(lra, tp, n_runs=n_runs, output=lra_out_def)

100%|██████████| 30/30 [00:00<00:00, 30.36it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.74588,0.775817,0.94314,0.953228
1,0.765471,0.706038,0.947894,0.92355
2,0.759787,0.788896,0.944465,0.948857
3,0.770538,0.724573,0.947662,0.931338
4,0.752718,0.774642,0.945546,0.943047
5,0.763788,0.759936,0.945008,0.945701
6,0.759957,0.738786,0.942009,0.952742
7,0.768288,0.715346,0.944562,0.947176
8,0.75675,0.766485,0.945436,0.948623
9,0.75795,0.759339,0.94555,0.939556


### ANN

In [13]:
ann = Classifier(MLPClassifier(), params_ann_def)
run(ann, tp, n_runs=n_runs, output=ann_out_def)

100%|██████████| 30/30 [00:21<00:00,  1.38it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.842787,0.876878,0.958951,0.96379
1,0.857575,0.81921,0.963061,0.938981
2,0.851406,0.90042,0.960099,0.963405
3,0.864412,0.814534,0.961964,0.948831
4,0.858481,0.845284,0.962727,0.954372
5,0.854166,0.86106,0.960731,0.954276
6,0.850181,0.872038,0.957261,0.970293
7,0.860227,0.854449,0.959364,0.961988
8,0.856663,0.842998,0.95952,0.955463
9,0.856123,0.860751,0.96028,0.953424
