In [1]:
from itertools import product
from datetime import datetime

import pandas as pd

from model import choose_dataset, train_model, add_new_row, plot_conf_matrix

import warnings
warnings.filterwarnings('ignore')

In [2]:
tune_list = [True, False]
scaling_list = [True, False]
pca_list = [True, False]
sampling_list = ['rus', 'nm-3', 'cnn', 'ros', 'smote', 'adasyn']

lr_results = pd.DataFrame(columns=['Algorithm', 'Non-pulsar precision', 'Non-pulsar recall', 'Non-pulsar F1', 'Pulsar precision', 'Pulsar recall', 'Pulsar F1', 'Weighted precision', 'Weighted recall', 'Weighted F1', 'Matthews corrcoefficient', "Cohen's Kappa", 'Parameters', 'PCA', 'Tuning', 'Scaling', 'Sampling'])

n = 0

print('Start time at', datetime.now().strftime("%H:%M:%S"))

for tuning, scaling, pca, sampling in product(tune_list, scaling_list, pca_list, sampling_list):
    n += 1
    print('Starting Algorithm - Logistic Regression, Autotuning - {}, Scaling - {}, Pca - {}, Sampling - {} ({} out of {})'.format(
        tuning, scaling, pca, sampling, n, len(list(product(tune_list, scaling_list, pca_list, sampling_list)))))
    

    algorithm, preds, y_test = train_model(algo='lr', pca=pca, tuning=tuning, scaling=scaling, sampling=sampling, n_jobs=7, scoring='cohen', verbose=0)     
    row = add_new_row(algorithm, preds, y_test)
    row['Algorithm'] = 'Logistic Regression'
    row['PCA'] = pca
    row['Tuning'] = tuning
    row['Scaling'] = scaling
    row['Sampling'] = sampling
    lr_results.loc[len(lr_results)] = row
    print('Done at', datetime.now().strftime("%H:%M:%S"), '\n')
    lr_results.to_csv('./results/lr_results.csv', index=False)

Start time at 12:00:55
Starting Algorithm - Logistic Regression, Autotuning - True, Scaling - True, Pca - True, Sampling - rus (1 out of 48)
Training begins...
Loaded ./data/X_train_scaled_pca_rus.csv
Loaded ./data/y_train_scaled_pca_rus.csv
Loaded ./data/X_test_scaled_pca.csv
Algorithm selected.
Tuning begins...
Tuning completed.
Training completed.
Done at 12:01:03 

Starting Algorithm - Logistic Regression, Autotuning - True, Scaling - True, Pca - True, Sampling - nm-3 (2 out of 48)
Training begins...
Loaded ./data/X_train_scaled_pca_nm-3.csv
Loaded ./data/y_train_scaled_pca_nm-3.csv
Loaded ./data/X_test_scaled_pca.csv
Algorithm selected.
Tuning begins...
Tuning completed.
Training completed.
Done at 12:01:09 

Starting Algorithm - Logistic Regression, Autotuning - True, Scaling - True, Pca - True, Sampling - cnn (3 out of 48)
Training begins...
Loaded ./data/X_train_scaled_pca_cnn.csv
Loaded ./data/y_train_scaled_pca_cnn.csv
Loaded ./data/X_test_scaled_pca.csv
Algorithm selected.
T

Done at 12:14:32 

Starting Algorithm - Logistic Regression, Autotuning - False, Scaling - True, Pca - True, Sampling - cnn (27 out of 48)
Training begins...
Loaded ./data/X_train_scaled_pca_cnn.csv
Loaded ./data/y_train_scaled_pca_cnn.csv
Loaded ./data/X_test_scaled_pca.csv
Algorithm selected.
Training completed.
Done at 12:14:32 

Starting Algorithm - Logistic Regression, Autotuning - False, Scaling - True, Pca - True, Sampling - ros (28 out of 48)
Training begins...
Loaded ./data/X_train_scaled_pca_ros.csv
Loaded ./data/y_train_scaled_pca_ros.csv
Loaded ./data/X_test_scaled_pca.csv
Algorithm selected.
Training completed.
Done at 12:14:32 

Starting Algorithm - Logistic Regression, Autotuning - False, Scaling - True, Pca - True, Sampling - smote (29 out of 48)
Training begins...
Loaded ./data/X_train_scaled_pca_smote.csv
Loaded ./data/y_train_scaled_pca_smote.csv
Loaded ./data/X_test_scaled_pca.csv
Algorithm selected.
Training completed.
Done at 12:14:33 

Starting Algorithm - Logist

In [3]:
lr_results.sort_values(by="Cohen's Kappa", ascending=False).reset_index(drop=True)

Unnamed: 0,Algorithm,Non-pulsar precision,Non-pulsar recall,Non-pulsar F1,Pulsar precision,Pulsar recall,Pulsar F1,Weighted precision,Weighted recall,Weighted F1,Matthews corrcoefficient,Cohen's Kappa,Parameters,PCA,Tuning,Scaling,Sampling
0,Logistic Regression,0.986888,0.991356,0.989117,0.913934,0.87451,0.893788,0.979958,0.980257,0.980062,0.883,0.883,"{'C': 9.79795918367347, 'class_weight': None, ...",False,True,False,ros
1,Logistic Regression,0.986686,0.991356,0.989016,0.913758,0.872549,0.892678,0.979758,0.980071,0.979864,0.882,0.882,"{'C': 1.0, 'class_weight': None, 'dual': False...",False,False,True,ros
2,Logistic Regression,0.986686,0.991356,0.989016,0.913758,0.872549,0.892678,0.979758,0.980071,0.979864,0.882,0.882,"{'C': 1.0, 'class_weight': None, 'dual': False...",False,False,True,smote
3,Logistic Regression,0.986885,0.99115,0.989013,0.912065,0.87451,0.892893,0.979778,0.980071,0.979883,0.882,0.882,"{'C': 3.736734693877551, 'class_weight': None,...",False,True,True,ros
4,Logistic Regression,0.986484,0.991356,0.988914,0.91358,0.870588,0.891566,0.979559,0.979885,0.979667,0.881,0.88,"{'C': 0.7061224489795919, 'class_weight': None...",False,True,True,smote
5,Logistic Regression,0.986279,0.99115,0.988709,0.911523,0.868627,0.889558,0.979178,0.979512,0.97929,0.879,0.878,"{'C': 1.0, 'class_weight': None, 'dual': False...",False,False,False,smote
6,Logistic Regression,0.987672,0.989298,0.988484,0.896414,0.882353,0.889328,0.979004,0.97914,0.979066,0.878,0.878,"{'C': 1.0, 'class_weight': None, 'dual': False...",False,False,True,cnn
7,Logistic Regression,0.986279,0.99115,0.988709,0.911523,0.868627,0.889558,0.979178,0.979512,0.97929,0.879,0.878,"{'C': 5.353061224489796, 'class_weight': None,...",False,True,False,smote
8,Logistic Regression,0.987472,0.989504,0.988487,0.898,0.880392,0.889109,0.978973,0.97914,0.979047,0.878,0.878,"{'C': 5.555102040816326, 'class_weight': None,...",False,True,True,cnn
9,Logistic Regression,0.985466,0.990739,0.988095,0.907025,0.860784,0.8833,0.978015,0.978394,0.978141,0.872,0.871,"{'C': 1.0, 'class_weight': None, 'dual': False...",False,False,False,ros
