In [1]:
from itertools import product
from datetime import datetime

import pandas as pd

from model import choose_dataset, train_model, add_new_row

import warnings
warnings.filterwarnings('ignore')

In [2]:
tune_list = [True, False]
scaling_list = [True, False]
pca_list = [False, False]
sampling_list = ['rus', 'nm-3', 'cnn', 'ros', 'smote', 'adasyn']

rf_results = pd.DataFrame(columns=['Algorithm', 'Non-pulsar precision', 'Non-pulsar recall', 'Non-pulsar F1', 'Pulsar precision', 'Pulsar recall', 'Pulsar F1', 'Weighted precision', 'Weighted recall', 'Weighted F1', 'Matthews corrcoefficient', "Cohen's Kappa", 'Parameters', 'PCA', 'Tuning', 'Scaling', 'Sampling'])

n = 0

print('Start time at', datetime.now().strftime("%H:%M:%S"))

for tuning, scaling, pca, sampling in product(tune_list, scaling_list, pca_list, sampling_list):
    n += 1
    print('Starting Algorithm - Random Forest, Autotuning - {}, Scaling - {}, Pca - {}, Oversampling - {} ({} out of {})'.format(
        tuning, scaling, pca, sampling, n, len(list(product(tune_list, scaling_list, pca_list, sampling_list)))))
    

    algorithm, preds, y_test = train_model(algo='rf', pca=pca, tuning=tuning, scaling=scaling, n_splits=5, sampling=sampling, n_jobs=7, scoring='cohen', verbose=0)     
    row = add_new_row(algorithm, preds, y_test)
    row['Algorithm'] = 'Random Forest'
    row['PCA'] = pca
    row['Tuning'] = tuning
    row['Scaling'] = scaling
    row['Sampling'] = sampling
    rf_results.loc[len(rf_results)] = row
    print('Done at', datetime.now().strftime("%H:%M:%S"), '\n')
    rf_results.to_csv('./results/rf_results.csv', index=False)

Start time at 11:32:26
Starting Algorithm - Random Forest, Autotuning - True, Scaling - True, Pca - False, Oversampling - rus (1 out of 48)
Training begins...
Loaded ./data/X_train_scaled_rus.csv
Loaded ./data/y_train_scaled_rus.csv
Loaded ./data/X_test_scaled.csv
Algorithm selected.
Tuning begins...
Tuning completed.
Training completed.
Done at 11:32:56 

Starting Algorithm - Random Forest, Autotuning - True, Scaling - True, Pca - False, Oversampling - nm-3 (2 out of 48)
Training begins...
Loaded ./data/X_train_scaled_nm-3.csv
Loaded ./data/y_train_scaled_nm-3.csv
Loaded ./data/X_test_scaled.csv
Algorithm selected.
Tuning begins...
Tuning completed.
Training completed.
Done at 11:33:19 

Starting Algorithm - Random Forest, Autotuning - True, Scaling - True, Pca - False, Oversampling - cnn (3 out of 48)
Training begins...
Loaded ./data/X_train_scaled_cnn.csv
Loaded ./data/y_train_scaled_cnn.csv
Loaded ./data/X_test_scaled.csv
Algorithm selected.
Tuning begins...
Tuning completed.
Train

Training completed.
Done at 12:07:50 

Starting Algorithm - Random Forest, Autotuning - False, Scaling - True, Pca - False, Oversampling - nm-3 (26 out of 48)
Training begins...
Loaded ./data/X_train_scaled_nm-3.csv
Loaded ./data/y_train_scaled_nm-3.csv
Loaded ./data/X_test_scaled.csv
Algorithm selected.
Training completed.
Done at 12:07:51 

Starting Algorithm - Random Forest, Autotuning - False, Scaling - True, Pca - False, Oversampling - cnn (27 out of 48)
Training begins...
Loaded ./data/X_train_scaled_cnn.csv
Loaded ./data/y_train_scaled_cnn.csv
Loaded ./data/X_test_scaled.csv
Algorithm selected.
Training completed.
Done at 12:07:51 

Starting Algorithm - Random Forest, Autotuning - False, Scaling - True, Pca - False, Oversampling - ros (28 out of 48)
Training begins...
Loaded ./data/X_train_scaled_ros.csv
Loaded ./data/y_train_scaled_ros.csv
Loaded ./data/X_test_scaled.csv
Algorithm selected.
Training completed.
Done at 12:07:53 

Starting Algorithm - Random Forest, Autotuning - 

In [4]:
rf_results

Unnamed: 0,Algorithm,Non-pulsar precision,Non-pulsar recall,Non-pulsar F1,Pulsar precision,Pulsar recall,Pulsar F1,Weighted precision,Weighted recall,Weighted F1,Matthews corrcoefficient,Cohen's Kappa,Parameters,PCA,Tuning,Scaling,Sampling
4,Random Forest,0.987887,0.990327,0.989106,0.905622,0.884314,0.894841,0.980073,0.980257,0.980152,0.884,0.884,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w...",False,True,True,smote
1,Random Forest,0.985096,0.993003,0.989034,0.927813,0.856863,0.890928,0.979655,0.980071,0.979714,0.881,0.88,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w...",False,True,True,nm-3
40,Random Forest,0.986675,0.990533,0.9886,0.906314,0.872549,0.889111,0.979041,0.979326,0.97915,0.878,0.878,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w...",False,False,False,smote
7,Random Forest,0.98509,0.992591,0.988826,0.92389,0.856863,0.889115,0.979277,0.979698,0.979355,0.879,0.878,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w...",False,True,True,nm-3
27,Random Forest,0.985487,0.992179,0.988822,0.920335,0.860784,0.889564,0.979298,0.979698,0.979393,0.879,0.878,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w...",False,False,True,ros
45,Random Forest,0.985487,0.992179,0.988822,0.920335,0.860784,0.889564,0.979298,0.979698,0.979393,0.879,0.878,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w...",False,False,False,ros
10,Random Forest,0.987074,0.990121,0.988595,0.90303,0.876471,0.889552,0.979091,0.979326,0.979187,0.878,0.878,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w...",False,True,True,smote
33,Random Forest,0.985487,0.992179,0.988822,0.920335,0.860784,0.889564,0.979298,0.979698,0.979393,0.879,0.878,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w...",False,False,True,ros
46,Random Forest,0.986675,0.990533,0.9886,0.906314,0.872549,0.889111,0.979041,0.979326,0.97915,0.878,0.878,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w...",False,False,False,smote
39,Random Forest,0.985487,0.992179,0.988822,0.920335,0.860784,0.889564,0.979298,0.979698,0.979393,0.879,0.878,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w...",False,False,False,ros
