In [1]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [2]:
import os
import dill
import warnings

import pandas as pd
import numpy as np

from glob import glob
from sklearn import metrics

from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier

from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold

In [3]:
# To Read out 100 rows rather than the default value. 
pd.set_option('display.max_rows', 1000)

# Not prinitng the warnings
warnings.filterwarnings('ignore')

# Setting the working path for data input and result outputs
os.chdir('D:\\Spring 2019\\DS 440\\Data')

# Setting a random seed for reproducability
np.random.seed(7)

In [4]:
# Problem 1
df = pd.read_csv('kplr_dr25_inj1_plti.csv', header = 0)

#filenames = glob('D:\Spring 2019\DS 440\Data\kplr_dr25_inj*.csv')
#df = pd.concat([pd.read_csv(f) for f in filenames], ignore_index = True)

print('Dataset Size:')
print(df.shape)
print()

temp_df = df.iloc[:, 0:15]
df_drop = temp_df[temp_df.isnull().any(axis=1)]
temp_df = temp_df.drop(df_drop.index.values)
temp_df = temp_df[temp_df.Recovered != 2]

print('Cleaned Dataset Size:')
print(temp_df.shape)
print()

X = temp_df.iloc[:, 1:14]
Y = temp_df.iloc[:, 14]

print('Input Size:', X.shape)
print('Output Size:', Y.shape)

Dataset Size:
(146294, 25)

Cleaned Dataset Size:
(145671, 15)

Input Size: (145671, 13)
Output Size: (145671,)


In [5]:
'''
# Problem 2
df = pd.read_csv('kplr_dr25_inj1_tces.csv', header = 0)

print('Dataset Size: ')
print(df.shape)
print()

cols = ['TCE_ID', 'KIC', 'Disp', 'Score', 'period', 'epoch', 'NTL', 'SS', 
        'CO', 'EM', 'Expected_MES', 'MES', 'NTran', 'depth', 'duration', 'Rp',
        'Rs', 'Ts', 'logg', 'a', 'Rp/Rs', 'a/Rs', 'impact', 'SNR_DV', 'Sp',
        'Fit_Prov']
df = df[cols]
df.columns

df['Disp'] = df['Disp'].replace('PC', 1)
df['Disp'] = df['Disp'].replace('FP', 0)

X = df.iloc[:, 6:25]
Y = df.iloc[:, 2]

print('Input Size:', X.shape)
print('Output Size:', Y.shape)
'''

"\n# Problem 2\ndf = pd.read_csv('kplr_dr25_inj1_tces.csv', header = 0)\n\nprint('Dataset Size: ')\nprint(df.shape)\nprint()\n\ncols = ['TCE_ID', 'KIC', 'Disp', 'Score', 'period', 'epoch', 'NTL', 'SS', \n        'CO', 'EM', 'Expected_MES', 'MES', 'NTran', 'depth', 'duration', 'Rp',\n        'Rs', 'Ts', 'logg', 'a', 'Rp/Rs', 'a/Rs', 'impact', 'SNR_DV', 'Sp',\n        'Fit_Prov']\ndf = df[cols]\ndf.columns\n\ndf['Disp'] = df['Disp'].replace('PC', 1)\ndf['Disp'] = df['Disp'].replace('FP', 0)\n\nX = df.iloc[:, 6:25]\nY = df.iloc[:, 2]\n\nprint('Input Size:', X.shape)\nprint('Output Size:', Y.shape)\n"

In [6]:
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 10, stop = 350, num = 5)]

# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(start = 2, stop = 30, num = 10)]

# Minimum number of samples required to split a node
min_samples_split = [2, 4, 6, 8, 10]

# Minimum number of samples required at each leaf node
min_samples_leaf = [0.10, 0.25, 0.50, 1, 2, 4]

# Minimum number of neighbors required for each node
n_neighbors = [int(x) for x in np.linspace(start = 3, stop = 30, num = 10)]

# Minimum Power parameter required for the Minkowski metric
p = [float(x) for x in np.linspace(start = 1, stop = 5, num = 10)]



# Maximizing the number of neurons in the hidden layers
hidden_layer_sizes = [(50,), (100,), (50,50,50), (50,100,50)]

# Activation functiosn for the hidden layers
activation = ['identity', 'logistic', 'tanh', 'relu']

# Solver for weight optimizations
solver = ['sgd', 'adam']

# Minimizing the regularization parameter
alpha = [0.0001, 0.001, 0.01]

# Maximizing the mini batch size
batch_size = [32, 64, 96, 128]

# Learning rate schedule for weight updates
learning_rate = ['constant', 'invscaling', 'adaptive']

# Setting up the k-fold
kfold = StratifiedKFold(n_splits = 10, shuffle = True, random_state = 7)

In [7]:
# Instantiate the model
abc = AdaBoostClassifier()

# Updating the param grid
param_grid = dict(n_estimators = n_estimators,
                  learning_rate = [0.0001, 0.001, 0.01, 0.1, 1.0],
                  algorithm = ['SAMME', 'SAMME.R'])

print('AdaBoost Classifier')
abc_grid_search = GridSearchCV(abc, param_grid, scoring = 'accuracy', n_jobs = -1, cv = kfold, verbose = 1)
abc_grid_result = abc_grid_search.fit(X, Y)
abc_predict = abc_grid_search.predict(X)
abc_predict_proba = pd.DataFrame(abc_grid_search.predict_proba(X))

# Store metrics
abc_accuracy = metrics.accuracy_score(Y, abc_predict)  
abc_precision = metrics.precision_score(Y, abc_predict, pos_label=1)
abc_recall = metrics.recall_score(Y, abc_predict, pos_label=1)  
abc_f1 = metrics.f1_score(Y, abc_predict, pos_label=1)
abc_auroc = metrics.roc_auc_score(Y, abc_predict)
abc_aurpc = metrics.average_precision_score(Y, abc_predict, pos_label=1)

dill.dump_session('AdaBoostClassifier_Parameter_Tuning.db')

AdaBoost Classifier
Fitting 10 folds for each of 50 candidates, totalling 500 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  2.7min
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed: 18.6min
[Parallel(n_jobs=-1)]: Done 434 tasks      | elapsed: 48.0min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed: 57.5min finished


In [8]:
# Instantiate the model
dtc = DecisionTreeClassifier()
    
# Updating the param grid
param_grid = dict(criterion = ['gini', 'entropy'],
                  max_depth = max_depth,
                  max_features = ['auto', 'sqrt', 'log2', None],
                  min_impurity_decrease = [0.00001, 0.0001, 0.001, 0.01, 0.1],
                  min_samples_split = min_samples_split,
                  min_samples_leaf = min_samples_leaf)

print('DecisionTree Classifier')
dtc_grid_search = GridSearchCV(dtc, param_grid, scoring = 'accuracy', n_jobs = -1, cv = kfold, verbose = 1)
dtc_grid_result = dtc_grid_search.fit(X, Y)
dtc_predict = dtc_grid_search.predict(X)
dtc_predict_proba = pd.DataFrame(dtc_grid_search.predict_proba(X))

# Store metrics
dtc_accuracy = metrics.accuracy_score(Y, dtc_predict)  
dtc_precision = metrics.precision_score(Y, dtc_predict, pos_label=1)
dtc_recall = metrics.recall_score(Y, dtc_predict, pos_label=1)  
dtc_f1 = metrics.f1_score(Y, dtc_predict, pos_label=1)
dtc_auroc = metrics.roc_auc_score(Y, dtc_predict_proba[1])
dtc_aurpc = metrics.average_precision_score(Y, dtc_predict, pos_label=1)

dill.dump_session('DecisionTreeClassifier_Parameter_Tuning.db')

DecisionTree Classifier
Fitting 10 folds for each of 12000 candidates, totalling 120000 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    3.5s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:    9.1s
[Parallel(n_jobs=-1)]: Done 434 tasks      | elapsed:   18.8s
[Parallel(n_jobs=-1)]: Done 784 tasks      | elapsed:   32.5s
[Parallel(n_jobs=-1)]: Done 1234 tasks      | elapsed:   49.4s
[Parallel(n_jobs=-1)]: Done 1784 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 2434 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 3184 tasks      | elapsed:  2.1min
[Parallel(n_jobs=-1)]: Done 4034 tasks      | elapsed:  2.6min
[Parallel(n_jobs=-1)]: Done 4984 tasks      | elapsed:  3.4min
[Parallel(n_jobs=-1)]: Done 6034 tasks      | elapsed:  4.6min
[Parallel(n_jobs=-1)]: Done 7184 tasks      | elapsed:  5.4min
[Parallel(n_jobs=-1)]: Done 8434 tasks      | elapsed:  6.3min
[Parallel(n_jobs=-1)]: Done 9784 tasks      | elapsed:  7.3min
[Parallel(n_jobs=-1)]: Done 11234 tasks      |

PicklingError: Could not pickle the task to send it to the workers.

In [None]:
# Instantiate the model
etc = ExtraTreesClassifier()

# Updating the param grid
param_grid = dict(bootstrap = ['True', 'False'],
                  criterion = ['gini', 'entropy'],
                  max_depth = max_depth,
                  max_features = ['auto', 'sqrt', 'log2', None],
                  min_impurity_decrease = [0.00001, 0.0001, 0.001, 0.01, 0.1],
                  min_samples_split = min_samples_split,
                  min_samples_leaf = min_samples_leaf,
                  n_estimators = n_estimators)

print('ExtraTrees Classifier')
etc_grid_search = GridSearchCV(etc, param_grid, scoring = 'accuracy', n_jobs = -1, cv = kfold, verbose = 1)
etc_grid_result = etc_grid_search.fit(X, Y)
etc_predict = etc_grid_search.predict(X)
etc_predict_proba = pd.DataFrame(etc_grid_search.predict_proba(X))

# Store metrics
etc_accuracy = metrics.accuracy_score(Y, etc_predict)  
etc_precision = metrics.precision_score(Y, etc_predict, pos_label=1)
etc_recall = metrics.recall_score(Y, etc_predict, pos_label=1)  
etc_f1 = metrics.f1_score(Y, etc_predict, pos_label=1)
etc_auroc = metrics.roc_auc_score(Y, etc_predict_proba[1])
etc_aurpc = metrics.average_precision_score(Y, etc_predict, pos_label=1)

dill.dump_session('ExtraTreesClassifier_Parameter_Tuning.db')

In [None]:
# Instantiate the model
gbc = GradientBoostingClassifier()
        
# Updating the param grid
param_grid = dict(criterion = ['friedman_mse', 'mse', 'mae'],
                  learning_rate = [0.0001, 0.001, 0.01, 0.1, 1.0],
                  loss = ['deviance', 'exponential'],
                  max_depth = max_depth,
                  max_features = ['auto', 'sqrt', 'log2', None],
                  min_impurity_decrease = [0.00001, 0.0001, 0.001, 0.01, 0.1],
                  min_samples_split = min_samples_split,
                  min_samples_leaf = min_samples_leaf,
                  n_estimators = n_estimators,
                  subsample = [0.5, 1.0, 1.5])
        
print('GradientBoosting Classifier')
gbc_grid_search = GridSearchCV(gbc, param_grid, scoring = 'accuracy', n_jobs = -1, cv = kfold, verbose = 1)
gbc_grid_result = gbc_grid_search.fit(X, Y)
gbc_predict = gbc_grid_search.predict(X)
gbc_predict_proba = pd.DataFrame(gbc_grid_search.predict_proba(X))

# Store metrics
gbc_accuracy = metrics.accuracy_score(Y, gbc_predict)  
gbc_precision = metrics.precision_score(Y, gbc_predict, pos_label=1)
gbc_recall = metrics.recall_score(Y, gbc_predict, pos_label=1)  
gbc_f1 = metrics.f1_score(Y, gbc_predict, pos_label=1)
gbc_auroc = metrics.roc_auc_score(Y, gbc_predict_proba[1])
gbc_aurpc = metrics.average_precision_score(Y, gbc_predict, pos_label=1)

dill.dump_session('GradientBoostingClassifier_Parameter_Tuning.db')

In [None]:
# Instantiate the model
gnb = GaussianNB()

# Updating the param grid
param_grid = dict()
        
print('Naive Bayes')
gnb_grid_search = GridSearchCV(gnb, param_grid, scoring = 'accuracy', n_jobs = -1, cv = kfold, verbose = 1)
gnb_grid_result = gnb_grid_search.fit(X, Y)
gnb_predict = gnb_grid_search.predict(X)
gnb_predict_proba = pd.DataFrame(gnb_grid_search.predict_proba(X))

# Store metrics
gnb_accuracy = metrics.accuracy_score(Y, gnb_predict)  
gnb_precision = metrics.precision_score(Y, gnb_predict, pos_label=1)
gnb_recall = metrics.recall_score(Y, gnb_predict, pos_label=1)  
gnb_f1 = metrics.f1_score(Y, gnb_predict, pos_label=1)
gnb_auroc = metrics.roc_auc_score(Y, gnb_predict_proba[1])
gnb_aurpc = metrics.average_precision_score(Y, gnb_predict, pos_label=1)

dill.dump_session('GaussianNB_Parameter_Tuning.db')

In [None]:
# Instantiate the model
gpc = GaussianProcessClassifier()

# Updating the param grid
param_grid = dict(optimizer = ['fmin', 'fmin_powell', 'fmin_cg', 'fmin_bfgs', 'fmin_ncg', 'fmin_l_bfgs_b', 'fmin_tnc',
                               'fmin_cobyla', 'fmin_slsqp'])
        
print('GaussianProcess Classifier')
gpc_grid_search = GridSearchCV(gpc, param_grid, scoring = 'accuracy', n_jobs = -1, cv = kfold, verbose = 1)
gpc_grid_result = gpc_grid_search.fit(X, Y)
gpc_predict = gpc_grid_search.predict(X)
gpc_predict_proba = pd.DataFrame(gpc_grid_search.predict_proba(X))

# Store metrics
gpc_accuracy = metrics.accuracy_score(Y, gpc_predict)  
gpc_precision = metrics.precision_score(Y, gpc_predict, pos_label=1)
gpc_recall = metrics.recall_score(Y, gpc_predict, pos_label=1)  
gpc_f1 = metrics.f1_score(Y, gpc_predict, pos_label=1)
gpc_auroc = metrics.roc_auc_score(Y, gpc_predict_proba[1])
gpc_aurpc = metrics.average_precision_score(Y, gpc_predict, pos_label=1)

dill.dump_session('GaussianProcessClassifier_Parameter_Tuning.db')

In [None]:
# Instantiate the model
knc = KNeighborsClassifier()

# Updating the param grid
param_grid = dict(algorithm = ['auto', 'ball_tree', 'kd_tree', 'brute'],
                  leaf_size = [10, 20, 30, 40, 50],
                  metric = ['euclidean', 'manhattan', 'chebyshev', 'minkowski', 'wminkowski', 'seuclidean', 'mahalanobis'],
                  n_neighbors = n_neighbors,
                  p = p,
                  weights = ['uniform', 'distance'])

print('KNeighbors Classifier')
knc_grid_search = GridSearchCV(knc, param_grid, scoring = 'accuracy', n_jobs = -1, cv = kfold, verbose = 1)
knc_grid_result = knc_grid_search.fit(X, Y)
knc_predict = knc_grid_search.predict(X)
knc_predict_proba = pd.DataFrame(knc_grid_search.predict_proba(X))

# Store metrics
knc_accuracy = metrics.accuracy_score(Y, knc_predict)  
knc_precision = metrics.precision_score(Y, knc_predict, pos_label=1)
knc_recall = metrics.recall_score(Y, knc_predict, pos_label=1)  
knc_f1 = metrics.f1_score(Y, knc_predict, pos_label=1)
knc_auroc = metrics.roc_auc_score(Y, knc_predict_proba[1])
knc_aurpc = metrics.average_precision_score(Y, knc_predict, pos_label=1)

dill.dump_session('KNeighborsClassifier_Parameter_Tuning.db')

In [None]:
# Instantiate the model
mlp = MLPClassifier()

# Updating the param grid
param_grid = dict(activation = ['identity', 'logistic', 'tanh', 'relu'],
                  alpha = [0.0005, 0.0001, 0.005, 0.001, 0.01, 0.1],
                  batch_size = [32, 64, 96, 128, 256],
                  hidden_layer_sizes = [(50, 100, 50), (100, 50, 100), (50, 100), (100, 50), (100, )],
                  learning_rate = ['constant', 'invscaling', 'adaptive']
                  learning_rate_init = [0.0001, 0.001, 0.01, 0.1],
                  shuffle = ['True', 'False'],
                  solver = ['lbfgs', 'sgd', 'adam'])

print('Multi-layer Perceptron Classifier')
mlp_grid_search = GridSearchCV(mlp, param_grid, scoring = 'accuracy', n_jobs = -1, cv = kfold, verbose = 1)
mlp_grid_result = mlp_grid_search.fit(X, Y)
mlp_predict = mlp_grid_search.predict(X)
mlp_predict_proba = pd.DataFrame(mlp_grid_search.predict_proba(X))

# Store metrics
mlp_accuracy = metrics.accuracy_score(Y, mlp_predict)  
mlp_precision = metrics.precision_score(Y, mlp_predict, pos_label=1)
mlp_recall = metrics.recall_score(Y, mlp_predict, pos_label=1)  
mlp_f1 = metrics.f1_score(Y, mlp_predict, pos_label=1)
mlp_auroc = metrics.roc_auc_score(Y, mlp_predict_proba[1])
mlp_aurpc = metrics.average_precision_score(Y, mlp_predict, pos_label=1)

dill.dump_session('MLPClassifier_Parameter_Tuning.db')

In [None]:
# Instantiate the model
rfc = RandomForestClassifier()

# Updating the param grid
param_grid = dict(bootstrap = ['True', 'False'],
                  criterion = ['gini', 'entropy'],
                  max_depth = max_depth,
                  max_features = ['auto', 'sqrt', 'log2', None],
                  min_impurity_decrease = [0.00001, 0.0001, 0.001, 0.01, 0.1],
                  min_samples_split = min_samples_split,
                  min_samples_leaf = min_samples_leaf,
                  n_estimators = n_estimators)

print('RandomForest Classifier')
rfc_grid_search = GridSearchCV(rfc, param_grid, scoring = 'accuracy', n_jobs = -1, cv = kfold, verbose = 1)
rfc_grid_result = rfc_grid_search.fit(X, Y)
rfc_predict = rfc_grid_search.predict(X)
rfc_predict_proba = pd.DataFrame(rfc_grid_search.predict_proba(X))

# Store metrics
rfc_accuracy = metrics.accuracy_score(Y, rfc_predict)  
rfc_precision = metrics.precision_score(Y, rfc_predict, pos_label=1)
rfc_recall = metrics.recall_score(Y, rfc_predict, pos_label=1)  
rfc_f1 = metrics.f1_score(Y, rfc_predict, pos_label=1)
rfc_auroc = metrics.roc_auc_score(Y, rfc_predict_proba[1])
rfc_aurpc = metrics.average_precision_score(Y, rfc_predict, pos_label=1)

dill.dump_session('RandomForestClassifier_Parameter_Tuning.db')

In [None]:
'''
# Instantiate the model
svc = SVC()

# Updating the param grid
param_grid = dict(C = [0.001, 0.01, 0.1, 1, 10],
                  decision_function_shape = ['ovo', 'ovr'],
                  degree = [0, 1, 2, 3, 4, 5, 6],
                  gamma = [0.001, 0.01, 0.1, 1, 10],
                  kernel = ['linear', 'poly', 'rbf', 'sigmoid', 'precomputed'],
                  shrinking = ['True', 'False'])
        
print('ExtraTrees Classifier')
svc_grid_search = GridSearchCV(svc, param_grid, scoring = 'accuracy', n_jobs = -1, cv = kfold, verbose = 1)
svc_grid_result = svc_grid_search.fit(X, Y)
svc_predict = svc_grid_search.predict(X)
svc_predict_proba = pd.DataFrame(svc_grid_search.predict_proba(X))

# Store metrics
svc_accuracy = metrics.accuracy_score(Y, svc_predict)  
svc_precision = metrics.precision_score(Y, svc_predict, pos_label=1)
svc_recall = metrics.recall_score(Y, svc_predict, pos_label=1)  
svc_f1 = metrics.f1_score(Y, svc_predict, pos_label=1)
svc_auroc = metrics.roc_auc_score(Y, svc_predict_proba[1])
svc_aurpc = metrics.average_precision_score(Y, svc_predict, pos_label=1)

dill.dump_session('SVC_Parameter_Tuning.db')
'''

In [None]:
# Instantiate the model
xgb = XGBClassifier()

# Updating the param grid
param_grid = dict(booster = ['gbtree', 'gblinear', 'dart'],
                  learning_rate = [0.0001, 0.001, 0.01, 0.1, 1.0],
                  min_split_loss = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5],
                  max_depth = max_depth,
                  min_child_weight = [1, 3, 5],
                  reg_lambda = [0.0001, 0.001, 0.01, 0.1, 0.0, 1.0],
                  reg_alpha = [0.0001, 0.001, 0.01, 0.1, 0.0, 1.0],
                  tree_method = ['exact', 'approx', 'hist'])

print('XGBoost Classifier')
xgb_grid_search = GridSearchCV(xgb, param_grid, scoring = 'accuracy', n_jobs = -1, cv = kfold, verbose = 1)
xgb_grid_result = xgb_grid_search.fit(X, Y)
xgb_predict = xgb_grid_search.predict(X)
xgb_predict_proba = pd.DataFrame(xgb_grid_search.predict_proba(X))

# Store metrics
xgb_accuracy = metrics.accuracy_score(Y, xgb_predict)  
xgb_precision = metrics.precision_score(Y, xgb_predict, pos_label=1)
xgb_recall = metrics.recall_score(Y, xgb_predict, pos_label=1)  
xgb_f1 = metrics.f1_score(Y, xgb_predict, pos_label=1)
xgb_auroc = metrics.roc_auc_score(Y, xgb_predict_proba[1])
xgb_aurpc = metrics.average_precision_score(Y, xgb_predict, pos_label=1)

dill.dump_session('XGBClassifier_Parameter_Tuning.db')

In [None]:
# Model comparison
models = pd.DataFrame({
    'Model': ['ADA Boost', 'Extra Trees', 'Random Forest', 'Gradient Boosting', 'XG Boost', 'Decision Tree',
              'Multi Layer Perceptron', 'K Neighbors', 'Naive Bayes', 'Aggregate'],
    'Accuracy' : [abc_accuracy, etc_accuracy, rfc_accuracy, gbc_accuracy, xgb_accuracy, dtc_accuracy,
                  mlp_accuracy, knc_accuracy, gnb_accuracy, aggregate_accuracy],
    'F1' : [abc_f1, etc_f1, rfc_f1, gbc_f1, xgb_f1, dtc_f1, mlp_f1, knc_f1, gnb_f1, aggregate_f1],
    'AUROC' : [abc_auroc, etc_auroc, rfc_auroc, gbc_auroc, xgb_auroc, dtc_auroc, mlp_auroc, knc_auroc, 
               gnb_auroc, aggregate_auroc],
    'AURPC' : [abc_aurpc, etc_aurpc, rfc_aurpc, gbc_aurpc, xgb_aurpc, dtc_aurpc, mlp_aurpc, knc_aurpc,
               gnb_aurpc, aggregate_aurpc],
    'Precision': [abc_precision, etc_precision, rfc_precision, gbc_precision, xgb_precision, dtc_precision, mlp_precision, 
                  knc_precision, gnb_precision, aggregate_precision],
    'Recall' : [abc_recall, etc_recall, rfc_recall, gbc_recall, xgb_recall, dtc_recall, mlp_recall, knc_recall,
                gnb_recall, aggregate_recall]
})
# Print table and sort by test precision
models = models.sort_values(by='Accuracy', ascending=False)

blankIndex=[''] * len(models)
models.index=blankIndex
models

In [None]:
'''
means = _grid_result.cv_results_['mean_test_score']
stds = _grid_result.cv_results_['std_test_score']
params = _grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))
'''