In [1]:
from itertools import product
from datetime import datetime

import pandas as pd

from model import choose_dataset, train_model, add_new_row

import warnings
warnings.filterwarnings('ignore')

In [2]:
tune_list = [True, False]
scaling_list = [True, False]
pca_list = [True, False]
sampling_list = ['rus', 'nm-3', 'cnn', 'ros', 'smote', 'adasyn']

knn_results = pd.DataFrame(columns=['Algorithm', 'Non-pulsar precision', 'Non-pulsar recall', 
                                    'Non-pulsar F1', 'Pulsar precision', 'Pulsar recall', 'Pulsar F1', 
                                    'Weighted precision', 'Weighted recall', 'Weighted F1', 
                                    'Matthews corrcoefficient', "Cohen's Kappa", 'Parameters', 
                                    'PCA', 'Tuning', 'Scaling', 'Sampling'])

n = 0

print('Start time at', datetime.now().strftime("%H:%M:%S"))

for tuning, scaling, pca, sampling in product(tune_list, scaling_list, pca_list, sampling_list):
    n += 1
    print('Starting Algorithm - KNN, Autotuning - {}, Scaling - {}, Pca - {}, Sampling strategy - {} ({} out of {})'.format(
        tuning, scaling, pca, sampling, n, len(list(product(tune_list, scaling_list, pca_list, sampling_list)))))
    

    algorithm, preds, y_test = train_model(algo='knn', pca=pca, tuning=tuning, scaling=scaling, n_splits=5, sampling=sampling, n_jobs=7, scoring='cohen', verbose=0)     
    row = add_new_row(algorithm, preds, y_test)
    row['Algorithm'] = 'KNN'
    row['PCA'] = pca
    row['Tuning'] = tuning
    row['Scaling'] = scaling
    row['Sampling'] = sampling
    knn_results.loc[len(knn_results)] = row
    print('Done at', datetime.now().strftime("%H:%M:%S"), '\n')
    knn_results.to_csv('./results/knn_results.csv', index=False)

Start time at 13:05:34
Starting Algorithm - KNN, Autotuning - True, Scaling - True, Pca - True, Sampling strategy - rus (1 out of 48)
Training begins...
Loaded ./data/X_train_scaled_pca_rus.csv
Loaded ./data/y_train_scaled_pca_rus.csv
Loaded ./data/X_test_scaled_pca.csv
Algorithm selected.
Tuning begins...
Tuning completed.
Training completed.
Done at 13:05:38 

Starting Algorithm - KNN, Autotuning - True, Scaling - True, Pca - True, Sampling strategy - nm-3 (2 out of 48)
Training begins...
Loaded ./data/X_train_scaled_pca_nm-3.csv
Loaded ./data/y_train_scaled_pca_nm-3.csv
Loaded ./data/X_test_scaled_pca.csv
Algorithm selected.
Tuning begins...
Tuning completed.
Training completed.
Done at 13:05:39 

Starting Algorithm - KNN, Autotuning - True, Scaling - True, Pca - True, Sampling strategy - cnn (3 out of 48)
Training begins...
Loaded ./data/X_train_scaled_pca_cnn.csv
Loaded ./data/y_train_scaled_pca_cnn.csv
Loaded ./data/X_test_scaled_pca.csv
Algorithm selected.
Tuning begins...
Tunin

Done at 13:12:05 

Starting Algorithm - KNN, Autotuning - False, Scaling - True, Pca - True, Sampling strategy - nm-3 (26 out of 48)
Training begins...
Loaded ./data/X_train_scaled_pca_nm-3.csv
Loaded ./data/y_train_scaled_pca_nm-3.csv
Loaded ./data/X_test_scaled_pca.csv
Algorithm selected.
Training completed.
Done at 13:12:06 

Starting Algorithm - KNN, Autotuning - False, Scaling - True, Pca - True, Sampling strategy - cnn (27 out of 48)
Training begins...
Loaded ./data/X_train_scaled_pca_cnn.csv
Loaded ./data/y_train_scaled_pca_cnn.csv
Loaded ./data/X_test_scaled_pca.csv
Algorithm selected.
Training completed.
Done at 13:12:06 

Starting Algorithm - KNN, Autotuning - False, Scaling - True, Pca - True, Sampling strategy - ros (28 out of 48)
Training begins...
Loaded ./data/X_train_scaled_pca_ros.csv
Loaded ./data/y_train_scaled_pca_ros.csv
Loaded ./data/X_test_scaled_pca.csv
Algorithm selected.
Training completed.
Done at 13:12:06 

Starting Algorithm - KNN, Autotuning - False, Scali

In [3]:
knn_results.sort_values(by="Cohen's Kappa", ascending=False)

Unnamed: 0,Algorithm,Non-pulsar precision,Non-pulsar recall,Non-pulsar F1,Pulsar precision,Pulsar recall,Pulsar F1,Weighted precision,Weighted recall,Weighted F1,Matthews corrcoefficient,Cohen's Kappa,Parameters,PCA,Tuning,Scaling,Sampling
7,KNN,0.984283,0.992385,0.988317,0.921277,0.84902,0.883673,0.978298,0.978767,0.978377,0.873,0.872,"{'algorithm': 'ball_tree', 'leaf_size': 30, 'm...",False,True,True,nm-3
9,KNN,0.986453,0.989092,0.987771,0.89336,0.870588,0.881827,0.97761,0.977836,0.977707,0.87,0.87,"{'algorithm': 'ball_tree', 'leaf_size': 30, 'm...",False,True,True,ros
10,KNN,0.987637,0.986417,0.987026,0.872093,0.882353,0.877193,0.976661,0.976532,0.976593,0.864,0.864,"{'algorithm': 'ball_tree', 'leaf_size': 30, 'm...",False,True,True,smote
34,KNN,0.987018,0.9858,0.986409,0.866279,0.876471,0.871345,0.975549,0.975414,0.975479,0.858,0.858,"{'algorithm': 'auto', 'leaf_size': 30, 'metric...",False,False,True,smote
32,KNN,0.98701,0.985182,0.986095,0.861272,0.876471,0.868805,0.975066,0.974856,0.974954,0.855,0.855,"{'algorithm': 'auto', 'leaf_size': 30, 'metric...",False,False,True,cnn
8,KNN,0.984407,0.987446,0.985924,0.876768,0.85098,0.863682,0.974182,0.974483,0.974312,0.85,0.85,"{'algorithm': 'ball_tree', 'leaf_size': 30, 'm...",False,True,True,cnn
2,KNN,0.980472,0.991974,0.986189,0.913907,0.811765,0.859813,0.974149,0.974856,0.974185,0.848,0.846,"{'algorithm': 'ball_tree', 'leaf_size': 30, 'm...",True,True,True,cnn
1,KNN,0.978534,0.994443,0.986424,0.937355,0.792157,0.858661,0.974622,0.975228,0.974288,0.849,0.845,"{'algorithm': 'ball_tree', 'leaf_size': 30, 'm...",True,True,True,nm-3
31,KNN,0.985173,0.984565,0.984869,0.853801,0.858824,0.856305,0.972694,0.972621,0.972657,0.841,0.841,"{'algorithm': 'auto', 'leaf_size': 30, 'metric...",False,False,True,nm-3
26,KNN,0.980624,0.989504,0.985044,0.890558,0.813725,0.85041,0.972069,0.972807,0.972255,0.837,0.835,"{'algorithm': 'auto', 'leaf_size': 30, 'metric...",True,False,True,cnn
