In [1]:
from itertools import product
from datetime import datetime

import pandas as pd

from model import choose_dataset, train_model, add_new_row

import warnings
warnings.filterwarnings('ignore')

In [2]:
tune_list = [True, False]
scaling_list = [True, False]
pca_list = [True, False]
sampling_list = ['rus', 'nm-3', 'cnn', 'ros', 'smote', 'adasyn']

abc_results = pd.DataFrame(columns=['Algorithm', 'Non-pulsar precision', 'Non-pulsar recall', 'Non-pulsar F1', 'Pulsar precision', 'Pulsar recall', 'Pulsar F1', 'Weighted precision', 'Weighted recall', 'Weighted F1', 'Matthews corrcoefficient', "Cohen's Kappa", 'Parameters', 'PCA', 'Tuning', 'Scaling', 'Sampling'])

n = 0

print('Start time at', datetime.now().strftime("%H:%M:%S"))

for tuning, scaling, pca, sampling in product(tune_list, scaling_list, pca_list, sampling_list):
    n += 1
    print('Starting Algorithm - AdaBoost, Autotuning - {}, Scaling - {}, Pca - {}, Oversampling - {} ({} out of {})'.format(
        tuning, scaling, pca, sampling, n, len(list(product(tune_list, scaling_list, pca_list, sampling_list)))))
    

    algorithm, preds, y_test = train_model(algo='abc', pca=pca, tuning=tuning, scaling=scaling, n_splits=5, sampling=sampling, n_jobs=7, scoring='cohen', verbose=0)     
    row = add_new_row(algorithm, preds, y_test)
    row['Algorithm'] = 'AdaBoost'
    row['PCA'] = pca
    row['Tuning'] = tuning
    row['Scaling'] = scaling
    row['Sampling'] = sampling
    abc_results.loc[len(abc_results)] = row
    print('Done at', datetime.now().strftime("%H:%M:%S"), '\n')
    abc_results.to_csv('./results/abc_results.csv', index=False)

Start time at 11:45:21
Starting Algorithm - AdaBoost, Autotuning - True, Scaling - True, Pca - True, Oversampling - rus (1 out of 48)
Training begins...
Loaded ./data/X_train_scaled_pca_rus.csv
Loaded ./data/y_train_scaled_pca_rus.csv
Loaded ./data/X_test_scaled_pca.csv
Algorithm selected.
Tuning begins...
Tuning completed.
Training completed.
Done at 11:47:13 

Starting Algorithm - AdaBoost, Autotuning - True, Scaling - True, Pca - True, Oversampling - nm-3 (2 out of 48)
Training begins...
Loaded ./data/X_train_scaled_pca_nm-3.csv
Loaded ./data/y_train_scaled_pca_nm-3.csv
Loaded ./data/X_test_scaled_pca.csv
Algorithm selected.
Tuning begins...
Tuning completed.
Training completed.
Done at 11:48:55 

Starting Algorithm - AdaBoost, Autotuning - True, Scaling - True, Pca - True, Oversampling - cnn (3 out of 48)
Training begins...
Loaded ./data/X_train_scaled_pca_cnn.csv
Loaded ./data/y_train_scaled_pca_cnn.csv
Loaded ./data/X_test_scaled_pca.csv
Algorithm selected.
Tuning begins...
Tunin

Done at 13:39:42 

Starting Algorithm - AdaBoost, Autotuning - False, Scaling - True, Pca - True, Oversampling - nm-3 (26 out of 48)
Training begins...
Loaded ./data/X_train_scaled_pca_nm-3.csv
Loaded ./data/y_train_scaled_pca_nm-3.csv
Loaded ./data/X_test_scaled_pca.csv
Algorithm selected.
Training completed.
Done at 13:39:42 

Starting Algorithm - AdaBoost, Autotuning - False, Scaling - True, Pca - True, Oversampling - cnn (27 out of 48)
Training begins...
Loaded ./data/X_train_scaled_pca_cnn.csv
Loaded ./data/y_train_scaled_pca_cnn.csv
Loaded ./data/X_test_scaled_pca.csv
Algorithm selected.
Training completed.
Done at 13:39:42 

Starting Algorithm - AdaBoost, Autotuning - False, Scaling - True, Pca - True, Oversampling - ros (28 out of 48)
Training begins...
Loaded ./data/X_train_scaled_pca_ros.csv
Loaded ./data/y_train_scaled_pca_ros.csv
Loaded ./data/X_test_scaled_pca.csv
Algorithm selected.
Training completed.
Done at 13:39:43 

Starting Algorithm - AdaBoost, Autotuning - False, 

In [4]:
abc_results.sort_values(by="Cohen's Kappa", ascending=False).reset_index(drop=True)

Unnamed: 0,Algorithm,Non-pulsar precision,Non-pulsar recall,Non-pulsar F1,Pulsar precision,Pulsar recall,Pulsar F1,Weighted precision,Weighted recall,Weighted F1,Matthews corrcoefficient,Cohen's Kappa,Parameters,PCA,Tuning,Scaling,Sampling
0,AdaBoost,0.985492,0.992591,0.989029,0.924211,0.860784,0.891371,0.979671,0.980071,0.979752,0.881,0.88,"{'algorithm': 'SAMME.R', 'base_estimator': Non...",False,True,True,smote
1,AdaBoost,0.985282,0.991974,0.988617,0.918239,0.858824,0.887538,0.978914,0.979326,0.979015,0.877,0.876,"{'algorithm': 'SAMME.R', 'base_estimator': Non...",False,True,False,ros
2,AdaBoost,0.985282,0.991974,0.988617,0.918239,0.858824,0.887538,0.978914,0.979326,0.979015,0.877,0.876,"{'algorithm': 'SAMME.R', 'base_estimator': Non...",False,True,True,ros
3,AdaBoost,0.985872,0.990945,0.988402,0.909278,0.864706,0.886432,0.978597,0.978953,0.978716,0.875,0.875,"{'algorithm': 'SAMME.R', 'base_estimator': Non...",False,True,False,smote
4,AdaBoost,0.985069,0.99115,0.9881,0.910417,0.856863,0.882828,0.977977,0.978394,0.9781,0.871,0.871,"{'algorithm': 'SAMME.R', 'base_estimator': Non...",False,False,True,smote
5,AdaBoost,0.987443,0.98724,0.987342,0.878669,0.880392,0.87953,0.977111,0.977091,0.977101,0.867,0.867,"{'algorithm': 'SAMME.R', 'base_estimator': Non...",False,True,True,adasyn
6,AdaBoost,0.98426,0.990945,0.987591,0.907757,0.84902,0.877406,0.976993,0.977463,0.977125,0.866,0.865,"{'algorithm': 'SAMME.R', 'base_estimator': Non...",False,False,False,smote
7,AdaBoost,0.985445,0.989298,0.987368,0.894094,0.860784,0.877123,0.976767,0.977091,0.976896,0.865,0.864,"{'algorithm': 'SAMME', 'base_estimator': None,...",False,True,True,cnn
8,AdaBoost,0.986434,0.987652,0.987042,0.880952,0.870588,0.87574,0.976414,0.976532,0.97647,0.863,0.863,"{'algorithm': 'SAMME.R', 'base_estimator': Non...",False,False,True,adasyn
9,AdaBoost,0.984449,0.990121,0.987277,0.900415,0.85098,0.875,0.976466,0.976904,0.976612,0.863,0.862,"{'algorithm': 'SAMME.R', 'base_estimator': Non...",False,False,True,ros
