# 6. Save and load a trained model

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [15]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

#importing data
heart_df = pd.read_csv('./data/heart-disease.csv')

# creating X, y
X = heart_df.drop('target', axis=1)
y = heart_df['target']

#setup random seed
np.random.seed(42)

# train val and test split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=.20)

clf = RandomForestClassifier( max_depth= 10,
                              max_features = 'auto',
                              min_samples_leaf = 4,
                              min_samples_split = 4,
                              n_estimators = 200)

# fit the model to the data (training machine learning model)
clf.fit(X_train, y_train);

# make prediction on validation sets
#y_preds = clf.predict(X_test)
clf.score(X_test, y_test)

0.8524590163934426

## 6.1 Using pickle

In [16]:
import pickle

#save an existing model to file
pickle.dump(clf, open("random_forest_classifier_model.pkl", "wb"))

In [17]:
# load saved model
loaded_model = pickle.load(open('random_forest_classifier_model.pkl', "rb"))

In [18]:
preds = loaded_model.predict(X_test)

In [21]:
# run the subsequent cell fast
evaluate_preds(y_test, preds);

Accuracy: 85.25
Precision: 0.85
Recall: 0.88
F1-score: 0.86


In [19]:
# function to get evaluation matrics in one shot
def evaluate_preds(y_true, y_preds):
    """Perform evaluation comparison on y_true and y_predicted labels on a classification model"""
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
    accuracy = accuracy_score(y_true, y_preds)
    precision = precision_score(y_true, y_preds)
    recall = recall_score(y_true, y_preds)
    f1 = f1_score(y_true, y_preds)
    metric_dict = {
        "accuracy": round(accuracy, 2),
        "precision": round(precision, 2),
        "recall": round(recall, 2),
        "f1": round(f1, 2)
    }
    
    print(f"Accuracy: {accuracy * 100:.2f}")
    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"F1-score: {f1:.2f}")
    return metric_dict

## 6.2 Using joblib

In [22]:
from joblib import dump, load

# save model to file
dump(clf, filename='random_forest_classifier_model.joblib')

['random_forest_classifier_model.joblib']

In [23]:
# load saved model
loaded_joblib_model = load(filename='random_forest_classifier_model.pkl')

In [25]:
joblib_y_preds = loaded_joblib_model.predict(X_test)

In [26]:
evaluate_preds(y_test,joblib_y_preds )

Accuracy: 85.25
Precision: 0.85
Recall: 0.88
F1-score: 0.86


{'accuracy': 0.85, 'precision': 0.85, 'recall': 0.88, 'f1': 0.86}