# Compare model results and final model selection

1. Evaluate all of our saved models on the validation set
2. Select the best model based on performance on the validation set
3. Evaluate that model on the holdout test set

# Read in data

In [8]:
import joblib
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score
from time import time

val_features = pd.read_csv('./dataset/val_features.csv')
val_labels = pd.read_csv('./dataset/val_labels.csv')

te_features = pd.read_csv('./dataset/test_features.csv')
te_labels = pd.read_csv('./dataset/test_labels.csv')

# Read in models

In [9]:
models = {}

for mdl in ['LR', 'SVM', 'MLP', 'Random_forest', 'Boosting']:
    models[mdl] = joblib.load('./models/{}_model.pkl'.format(mdl))

In [10]:
models

{'LR': LogisticRegression(C=1, class_weight=None, dual=False, fit_intercept=True,
                    intercept_scaling=1, l1_ratio=None, max_iter=100,
                    multi_class='auto', n_jobs=None, penalty='l2',
                    random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                    warm_start=False),
 'SVM': SVC(C=0.1, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
     decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
     max_iter=-1, probability=False, random_state=None, shrinking=True,
     tol=0.001, verbose=False),
 'MLP': MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
               beta_2=0.999, early_stopping=False, epsilon=1e-08,
               hidden_layer_sizes=(50,), learning_rate='invscaling',
               learning_rate_init=0.001, max_fun=15000, max_iter=200,
               momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
               power_t=0.5, rando

# Evaluating models on validation set

In [11]:
def evaluate_model(name, model, features, labels):
    start = time()
    pred = model.predict(features)
    end = time()
    accuracy = round(accuracy_score(labels, pred), 3)
    precision = round(precision_score(labels, pred), 3)
    recall = round(recall_score(labels, pred), 3)
    print('{} -- Accuracy: {} / Precision: {} / Recall: {} / Latency: {}ms'.format(name,
        accuracy,
        precision,
        recall,
        round((end - start)*1000, 1)))

In [12]:
for name, mdl in models.items():
    evaluate_model(name, mdl, val_features, val_labels)

LR -- Accuracy: 0.775 / Precision: 0.712 / Recall: 0.646 / Latency: 2.1ms
SVM -- Accuracy: 0.747 / Precision: 0.672 / Recall: 0.6 / Latency: 2.4ms
MLP -- Accuracy: 0.781 / Precision: 0.71 / Recall: 0.677 / Latency: 98.5ms
Random_forest -- Accuracy: 0.803 / Precision: 0.812 / Recall: 0.6 / Latency: 84.7ms
Boosting -- Accuracy: 0.815 / Precision: 0.808 / Recall: 0.646 / Latency: 8.2ms


$Accuracy = \frac{predicted correctly}{total no of examples}$

$Precision = \frac{predicted as surviving that actually survived}{total no survival predictions}$

$Recall = \frac{predicted as surviving that actually survived}{total no that actually survived}$

# Evaluate best model on test set

In [13]:
evaluate_model('Random Forest', models['Random_forest'], te_features, te_labels)

Random Forest -- Accuracy: 0.788 / Precision: 0.828 / Recall: 0.632 / Latency: 49.0ms
