In [1]:
from itertools import product
from datetime import datetime

import pandas as pd

from model import choose_dataset, train_model, add_new_row, plot_conf_matrix

import warnings
warnings.filterwarnings('ignore')

In [2]:
tune_list = [True, False]
scaling_list = [True]
pca_list = [False]
sampling_list = ['rus', 'nm-3', 'cnn', 'ros', 'smote', 'adasyn']

xgbc_results = pd.DataFrame(columns=['Algorithm', 'Non-pulsar precision', 'Non-pulsar recall', 'Non-pulsar F1', 'Pulsar precision', 'Pulsar recall', 'Pulsar F1', 'Weighted precision', 'Weighted recall', 'Weighted F1', 'Matthews corrcoefficient', "Cohen's Kappa", 'Parameters', 'PCA', 'Tuning', 'Scaling', 'Sampling'])

n = 0

print('Start time at', datetime.now().strftime("%H:%M:%S"))

for tuning, scaling, pca, sampling in product(tune_list, scaling_list, pca_list, sampling_list):
    n += 1
    print('Starting Algorithm - Gradient Boosting, Autotuning - {}, Scaling - {}, Pca - {}, Oversampling - {} ({} out of {})'.format(
        tuning, scaling, pca, sampling, n, len(list(product(tune_list, scaling_list, pca_list, sampling_list)))))
    

    algorithm, preds, y_test = train_model(algo='xgbc', pca=pca, tuning=tuning, scaling=scaling, sampling=sampling, n_splits=3, n_jobs=7, scoring='cohen', verbose=0)     
    row = add_new_row(algorithm, preds, y_test)
    row['Algorithm'] = 'Gradient Boosting'
    row['PCA'] = pca
    row['Tuning'] = tuning
    row['Scaling'] = scaling
    row['Sampling'] = sampling
    xgbc_results.loc[len(xgbc_results)] = row
    print('Done at', datetime.now().strftime("%H:%M:%S"), '\n')
    xgbc_results.to_csv('./results/xgbc_results.csv', index=False)

Start time at 14:43:32
Starting Algorithm - Gradient Boosting, Autotuning - True, Scaling - True, Pca - False, Oversampling - rus (1 out of 12)
Training begins...
Loaded ./data/X_train_scaled_rus.csv
Loaded ./data/y_train_scaled_rus.csv
Loaded ./data/X_test_scaled.csv
Algorithm selected.
Tuning begins...
Tuning completed.
Training completed.
Done at 14:49:28 

Starting Algorithm - Gradient Boosting, Autotuning - True, Scaling - True, Pca - False, Oversampling - nm-3 (2 out of 12)
Training begins...
Loaded ./data/X_train_scaled_nm-3.csv
Loaded ./data/y_train_scaled_nm-3.csv
Loaded ./data/X_test_scaled.csv
Algorithm selected.
Tuning begins...
Tuning completed.
Training completed.
Done at 14:53:58 

Starting Algorithm - Gradient Boosting, Autotuning - True, Scaling - True, Pca - False, Oversampling - cnn (3 out of 12)
Training begins...
Loaded ./data/X_train_scaled_cnn.csv
Loaded ./data/y_train_scaled_cnn.csv
Loaded ./data/X_test_scaled.csv
Algorithm selected.
Tuning begins...
Tuning comp

In [5]:
xgbc_results.sort_values(by="Cohen's Kappa", ascending=False)

Unnamed: 0,Algorithm,Non-pulsar precision,Non-pulsar recall,Non-pulsar F1,Pulsar precision,Pulsar recall,Pulsar F1,Weighted precision,Weighted recall,Weighted F1,Matthews corrcoefficient,Cohen's Kappa,Parameters,PCA,Tuning,Scaling,Sampling
4,Gradient Boosting,0.987692,0.990945,0.989316,0.910931,0.882353,0.896414,0.980401,0.98063,0.980491,0.886,0.886,"{'objective': 'binary:logistic', 'base_score':...",False,True,True,smote
3,Gradient Boosting,0.985665,0.990533,0.988093,0.90535,0.862745,0.883534,0.978035,0.978394,0.978161,0.872,0.872,"{'objective': 'binary:logistic', 'base_score':...",False,True,True,ros
5,Gradient Boosting,0.989866,0.984976,0.987415,0.863296,0.903922,0.883142,0.977843,0.977277,0.97751,0.871,0.871,"{'objective': 'binary:logistic', 'base_score':...",False,True,True,adasyn
2,Gradient Boosting,0.987649,0.987446,0.987548,0.880626,0.882353,0.881489,0.977483,0.977463,0.977473,0.869,0.869,"{'objective': 'binary:logistic', 'base_score':...",False,True,True,cnn
10,Gradient Boosting,0.985653,0.98971,0.987677,0.897959,0.862745,0.88,0.977323,0.977649,0.977449,0.868,0.868,"{'objective': 'binary:logistic', 'base_score':...",False,False,True,smote
9,Gradient Boosting,0.984659,0.990739,0.98769,0.90625,0.852941,0.878788,0.977211,0.977649,0.977345,0.867,0.866,"{'objective': 'binary:logistic', 'base_score':...",False,False,True,ros
1,Gradient Boosting,0.985835,0.988269,0.98705,0.885542,0.864706,0.875,0.976308,0.976532,0.976407,0.862,0.862,"{'objective': 'binary:logistic', 'base_score':...",False,True,True,nm-3
11,Gradient Boosting,0.988831,0.983947,0.986383,0.853933,0.894118,0.873563,0.976017,0.975414,0.975667,0.86,0.86,"{'objective': 'binary:logistic', 'base_score':...",False,False,True,adasyn
8,Gradient Boosting,0.986186,0.984359,0.985271,0.853565,0.868627,0.86103,0.973588,0.973366,0.97347,0.846,0.846,"{'objective': 'binary:logistic', 'base_score':...",False,False,True,cnn
0,Gradient Boosting,0.992772,0.961103,0.976681,0.715789,0.933333,0.810213,0.966462,0.958465,0.960868,0.796,0.787,"{'objective': 'binary:logistic', 'base_score':...",False,True,True,rus
