### Read in Data

In [1]:
import joblib
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score
from time import time

te_features = pd.read_csv('test_features.csv')
te_labels = pd.read_csv('test_labels.csv')

### Evaluate models on the test set

In [2]:
gb_model = joblib.load('GB_model.pkl')

In [3]:
def evaluate_model(model, features, labels):
    start = time()
    pred = model.predict(features)
    end = time()
    accuracy = round(accuracy_score(labels, pred), 3)
    precision = round(precision_score(labels, pred), 3)
    recall = round(recall_score(labels, pred), 3)
    print('{} -- Accuracy: {} / Precision: {} / Recall: {} / Latency: {}ms'.format(str(model).split('(')[0],
                                                                                   accuracy,
                                                                                   precision,
                                                                                   recall,
                                                                                   round((end - start)*1000, 1)))

In [4]:
evaluate_model(gb_model, te_features, te_labels)

GradientBoostingClassifier -- Accuracy: 0.809 / Precision: 0.804 / Recall: 0.631 / Latency: 8.0ms


### Making prediction on a random sample

In [5]:
prediction = gb_model.predict_proba(pd.DataFrame({'Unnamed: 0': 260,
                              'Pclass': 3,
                              'Sex': 0,
                              'Age': 40,
                              'Fare': 26,
                              'Family_cnt': 4,
                              'Cabin_ind': 0}, index=[0]))
print(prediction)

[[0.9268099 0.0731901]]


In [6]:
survival_rate = round(prediction[0][0] * 100, 1)
death_rate = round(prediction[0][1] * 100, 1)

print(f'The passenger has a {survival_rate}% rate of survival and a {death_rate}% rate of death.')

The passenger has a 92.7% rate of survival and a 7.3% rate of death.
