In [1]:
from itertools import product
from datetime import datetime

import pandas as pd

from model import choose_dataset, train_model, add_new_row

import warnings
warnings.filterwarnings('ignore')

In [2]:
tune_list = [True, False]
scaling_list = [True, False]
pca_list = [True, False]
sampling_list = ['rus', 'nm-3', 'cnn', 'ros', 'smote', 'adasyn']

dt_results = pd.DataFrame(columns=['Algorithm', 'Non-pulsar precision', 'Non-pulsar recall', 'Non-pulsar F1', 'Pulsar precision', 'Pulsar recall', 'Pulsar F1', 'Weighted precision', 'Weighted recall', 'Weighted F1', 'Matthews corrcoefficient', "Cohen's Kappa", 'Parameters', 'PCA', 'Tuning', 'Scaling', 'Sampling'])

n = 0

print('Start time at', datetime.now().strftime("%H:%M:%S"))

for tuning, scaling, pca, sampling in product(tune_list, scaling_list, pca_list, sampling_list):
    n += 1
    print('Starting Algorithm - Decision Tree, Autotuning - {}, Scaling - {}, Pca - {}, Oversampling - {} ({} out of {})'.format(
        tuning, scaling, pca, sampling, n, len(list(product(tune_list, scaling_list, pca_list, sampling_list)))))
    

    algorithm, preds, y_test = train_model(algo='dt', pca=pca, tuning=tuning, scaling=scaling, n_splits=5, sampling=sampling, n_jobs=7, scoring='cohen', verbose=0)     
    row = add_new_row(algorithm, preds, y_test)
    row['Algorithm'] = 'Decision Tree'
    row['PCA'] = pca
    row['Tuning'] = tuning
    row['Scaling'] = scaling
    row['Sampling'] = sampling
    dt_results.loc[len(dt_results)] = row
    print('Done at', datetime.now().strftime("%H:%M:%S"), '\n')
    dt_results.to_csv('./results/dt_results.csv', index=False)

Start time at 14:51:58
Starting Algorithm - Decision Tree, Autotuning - True, Scaling - True, Pca - True, Oversampling - rus (1 out of 48)
Training begins...
Loaded ./data/X_train_scaled_pca_rus.csv
Loaded ./data/y_train_scaled_pca_rus.csv
Loaded ./data/X_test_scaled_pca.csv
Algorithm selected.
Tuning begins...
Tuning completed.
Training completed.
Done at 14:52:00 

Starting Algorithm - Decision Tree, Autotuning - True, Scaling - True, Pca - True, Oversampling - nm-3 (2 out of 48)
Training begins...
Loaded ./data/X_train_scaled_pca_nm-3.csv
Loaded ./data/y_train_scaled_pca_nm-3.csv
Loaded ./data/X_test_scaled_pca.csv
Algorithm selected.
Tuning begins...
Tuning completed.
Training completed.
Done at 14:52:00 

Starting Algorithm - Decision Tree, Autotuning - True, Scaling - True, Pca - True, Oversampling - cnn (3 out of 48)
Training begins...
Loaded ./data/X_train_scaled_pca_cnn.csv
Loaded ./data/y_train_scaled_pca_cnn.csv
Loaded ./data/X_test_scaled_pca.csv
Algorithm selected.
Tuning 

Starting Algorithm - Decision Tree, Autotuning - False, Scaling - True, Pca - True, Oversampling - cnn (27 out of 48)
Training begins...
Loaded ./data/X_train_scaled_pca_cnn.csv
Loaded ./data/y_train_scaled_pca_cnn.csv
Loaded ./data/X_test_scaled_pca.csv
Algorithm selected.
Training completed.
Done at 14:52:17 

Starting Algorithm - Decision Tree, Autotuning - False, Scaling - True, Pca - True, Oversampling - ros (28 out of 48)
Training begins...
Loaded ./data/X_train_scaled_pca_ros.csv
Loaded ./data/y_train_scaled_pca_ros.csv
Loaded ./data/X_test_scaled_pca.csv
Algorithm selected.
Training completed.
Done at 14:52:17 

Starting Algorithm - Decision Tree, Autotuning - False, Scaling - True, Pca - True, Oversampling - smote (29 out of 48)
Training begins...
Loaded ./data/X_train_scaled_pca_smote.csv
Loaded ./data/y_train_scaled_pca_smote.csv
Loaded ./data/X_test_scaled_pca.csv
Algorithm selected.
Training completed.
Done at 14:52:18 

Starting Algorithm - Decision Tree, Autotuning - Fal

In [3]:
dt_results.sort_values(by="Cohen's Kappa", ascending=False).reset_index(drop=True)

Unnamed: 0,Algorithm,Non-pulsar precision,Non-pulsar recall,Non-pulsar F1,Pulsar precision,Pulsar recall,Pulsar F1,Weighted precision,Weighted recall,Weighted F1,Matthews corrcoefficient,Cohen's Kappa,Parameters,PCA,Tuning,Scaling,Sampling
0,Decision Tree,0.987064,0.989298,0.98818,0.895792,0.876471,0.886026,0.978394,0.978581,0.978476,0.874,0.874,"{'ccp_alpha': 0.0, 'class_weight': None, 'crit...",False,True,False,smote
1,Decision Tree,0.986462,0.98971,0.988083,0.898785,0.870588,0.884462,0.978133,0.978394,0.97824,0.873,0.873,"{'ccp_alpha': 0.0, 'class_weight': None, 'crit...",False,True,True,smote
2,Decision Tree,0.983068,0.991768,0.987399,0.914347,0.837255,0.874104,0.97654,0.977091,0.976637,0.863,0.862,"{'ccp_alpha': 0.0, 'class_weight': None, 'crit...",False,True,True,nm-3
3,Decision Tree,0.985225,0.988063,0.986642,0.883065,0.858824,0.870775,0.975521,0.975787,0.975636,0.858,0.857,"{'ccp_alpha': 0.0, 'class_weight': None, 'crit...",False,True,True,cnn
4,Decision Tree,0.989798,0.978391,0.984061,0.814488,0.903922,0.856877,0.973145,0.971317,0.97198,0.842,0.841,"{'ccp_alpha': 0.0, 'class_weight': None, 'crit...",False,True,False,adasyn
5,Decision Tree,0.988367,0.979214,0.983769,0.818018,0.890196,0.852582,0.972186,0.970758,0.971308,0.837,0.836,"{'ccp_alpha': 0.0, 'class_weight': None, 'crit...",False,True,False,cnn
6,Decision Tree,0.984336,0.982918,0.983627,0.839458,0.85098,0.84518,0.970574,0.970386,0.970476,0.829,0.829,"{'ccp_alpha': 0.0, 'class_weight': None, 'crit...",False,True,True,ros
7,Decision Tree,0.984336,0.982918,0.983627,0.839458,0.85098,0.84518,0.970574,0.970386,0.970476,0.829,0.829,"{'ccp_alpha': 0.0, 'class_weight': None, 'crit...",False,True,False,ros
8,Decision Tree,0.974002,0.994649,0.984217,0.936118,0.747059,0.830971,0.970404,0.971131,0.969661,0.822,0.815,"{'ccp_alpha': 0.0, 'class_weight': None, 'crit...",True,True,True,nm-3
9,Decision Tree,0.976466,0.990533,0.983449,0.895455,0.772549,0.829474,0.968771,0.969827,0.968823,0.816,0.813,"{'ccp_alpha': 0.0, 'class_weight': None, 'crit...",True,True,True,smote
