In [31]:
import joblib
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from time import time

val_features = pd.read_csv('val_features.csv')
val_labels = pd.read_csv('val_labels.csv')

ts_features = pd.read_csv('test_features.csv')
ts_labels = pd.read_csv('test_labels.csv')

In [32]:
model={}
for m in ['LR','SVM','GB','RF','MLP']:
    model[m] = joblib.load('{}_model.pkl'.format(m))

In [33]:
model

{'LR': LogisticRegression(C=1, max_iter=10000),
 'SVM': SVC(C=0.1, gamma=0.1, kernel='linear'),
 'GB': GradientBoostingClassifier(learning_rate=0.01, n_estimators=500),
 'RF': RandomForestClassifier(max_depth=4, n_estimators=25),
 'MLP': MLPClassifier(activation='tanh', hidden_layer_sizes=(50,), max_iter=1000)}

# Evaluation
Accuracy=(#actual survivals)/(total No of people)

precision = (#predicted as survived that actually survived)/(# total predicted as survived) ==> when the model predicted that someone survived, how often they actually survived

Recal = (# predicted as survived that are truely survived)/(# total actually survived) ==> how precisely the model can predict survival for the people who actually survived

In [42]:
def evaluate_model(name, model,features,labels):
    start = time()
    pred = model.predict(features)
    end = time()
    acc = round(accuracy_score(labels,pred),3)
    prec = round(precision_score(labels,pred),3)
    rec = round(recall_score(labels,pred),3)
    f1 = round(f1_score(labels,pred),3)
    print('{} Accuracy: {} / Precision: {} / Recall: {} / F1_score: {}/ Latency: {}ms'.format(name,
                                                                                              acc,prec,
                                                                                              rec,
                                                                                              f1,
                                                                                              round((end-start)*1000),1))
    
    
    

In [43]:
for name,mdl in model.items():
    evaluate_model(name, mdl,val_features,val_labels)

LR Accuracy: 0.775 / Precision: 0.712 / Recall: 0.646 / F1_score: 0.677/ Latency: 0ms
SVM Accuracy: 0.747 / Precision: 0.672 / Recall: 0.6 / F1_score: 0.634/ Latency: 16ms
GB Accuracy: 0.815 / Precision: 0.808 / Recall: 0.646 / F1_score: 0.718/ Latency: 0ms
RF Accuracy: 0.809 / Precision: 0.83 / Recall: 0.6 / F1_score: 0.696/ Latency: 20ms
MLP Accuracy: 0.781 / Precision: 0.724 / Recall: 0.646 / F1_score: 0.683/ Latency: 2ms


In [44]:
for name,mdl in model.items():
    evaluate_model(name, mdl,ts_features,ts_labels)

LR Accuracy: 0.827 / Precision: 0.846 / Recall: 0.724 / F1_score: 0.78/ Latency: 2ms
SVM Accuracy: 0.799 / Precision: 0.794 / Recall: 0.711 / F1_score: 0.75/ Latency: 4ms
GB Accuracy: 0.816 / Precision: 0.852 / Recall: 0.684 / F1_score: 0.759/ Latency: 4ms
RF Accuracy: 0.799 / Precision: 0.845 / Recall: 0.645 / F1_score: 0.731/ Latency: 5ms
MLP Accuracy: 0.821 / Precision: 0.844 / Recall: 0.711 / F1_score: 0.771/ Latency: 2ms


In [45]:
evaluate_model('Gradient Boosting', models['GB'], ts_features, ts_labels)

Gradient Boosting Accuracy: 0.816 / Precision: 0.852 / Recall: 0.684 / F1_score: 0.759/ Latency: 5ms
