# Base Learner Runs

This notebook runs the base learners $n$ times and saves the output to a CSV file separately for each base learner.

## Imports

In [1]:
# sklearn
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier

# custom modules
from tadpole import Tadpole
from classifier import Classifier
from main import run

## Data Loading

In [2]:
%%time
tp = Tadpole()

Wall time: 4.47 s


In [3]:
n_runs = 10 # change to 30 for final evaluation

# dtc_out = './results/dtc.csv'
# svc_out = './results/svc.csv'
# lra_out = './results/lra.csv'
# ann_out = './results/ann.csv'

# Temporary
dtc_out = None
svc_out = None
lra_out = None
ann_out = None

**NOTE:** Do we want to give a random state (seed) to the classifiers as well?

## Base Learners

### DTC

**NOTE:** DTC with best params from grid search performs much worse at times (i.e., inconsistent) than default params.

In [4]:
params_dtc = {'class_weight': 'balanced',
              'criterion': 'gini', 'max_depth': 8, 
              'max_features': 'sqrt', 
              'min_samples_leaf': 0.2, 
              'min_samples_split': 0.5, 
              'splitter': 'best'}
dtc = Classifier(DecisionTreeClassifier(), params_dtc)
run(dtc, tp, n_runs=n_runs, output=dtc_out)

100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 86.33it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.622127,0.649014,0.774706,0.786886
1,0.635546,0.625825,0.763156,0.757699
2,0.531036,0.523447,0.704137,0.663967
3,0.662253,0.630602,0.774853,0.738808
4,0.653679,0.676963,0.778519,0.788884
5,0.865346,0.86693,0.9291,0.932903
6,0.733535,0.729266,0.8665,0.860563
7,0.513436,0.507855,0.681546,0.684274
8,0.631663,0.641354,0.759945,0.770472
9,0.469953,0.422809,0.643756,0.631547


### SVC

In [6]:
params_svc = {'C': 1000, 
              'class_weight': 'balanced',
              'gamma': 0.001, 
              'kernel': 'linear', 
              'tol': 1}
svc = Classifier(SVC(probability=True), params_svc)
run(svc, tp, n_runs=n_runs, output=svc_out)

100%|██████████████████████████████████████████| 10/10 [00:04<00:00,  2.13it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.89712,0.916402,0.966393,0.973219
1,0.902532,0.881761,0.971719,0.954374
2,0.892908,0.906579,0.968982,0.960183
3,0.90129,0.880793,0.970566,0.966689
4,0.902346,0.879083,0.967923,0.967499
5,0.896379,0.880153,0.967624,0.970442
6,0.895714,0.913932,0.963513,0.967499
7,0.899834,0.883634,0.96645,0.960338
8,0.900447,0.89626,0.970683,0.967337
9,0.893691,0.909256,0.965296,0.974814


### LRA

In [7]:
params_lra = {'C': 0.08858667904100823, 
              'class_weight': 'balanced', 
              'dual': False, 
              'penalty': 'l1', 
              'solver': 'liblinear', 
              'tol': 0.0001}
lra = Classifier(LogisticRegression(), params_lra)
run(lra, tp, n_runs=n_runs, output=lra_out)

100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 53.52it/s]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.890812,0.918112,0.966704,0.972424
1,0.897712,0.880875,0.96982,0.956863
2,0.88779,0.926351,0.967177,0.97144
3,0.899215,0.882358,0.967935,0.966686
4,0.895333,0.896919,0.967273,0.970815
5,0.896775,0.894796,0.967313,0.97132
6,0.887661,0.923673,0.96571,0.976824
7,0.895776,0.906598,0.967829,0.967061
8,0.894458,0.893664,0.967815,0.968559
9,0.889721,0.910513,0.964972,0.976258


### ANN

In [8]:
params_ann = {'activation': 'relu', 
              'alpha': 0.05, 
              'hidden_layer_sizes': (50, 50, 50), 
              'learning_rate': 'adaptive', 
              'solver': 'adam'}
ann = Classifier(MLPClassifier(), params_ann)
run(ann, tp, n_runs=n_runs, output=ann_out)

100%|██████████████████████████████████████████| 10/10 [00:16<00:00,  1.66s/it]


Unnamed: 0,BCA_train,BCA_test,mAUC_train,mAUC_test
0,0.889335,0.905095,0.981798,0.976381
1,0.897557,0.861021,0.979624,0.960483
2,0.896506,0.892903,0.980901,0.967508
3,0.892758,0.864955,0.978286,0.96822
4,0.896254,0.853133,0.977289,0.969962
5,0.888449,0.888329,0.979373,0.973213
6,0.88572,0.892595,0.975934,0.976968
7,0.909024,0.866251,0.981221,0.966397
8,0.899282,0.875457,0.980589,0.97229
9,0.886589,0.90184,0.97754,0.977981
