## Saving and Loading ML model

2 ways to save and load ML models
1. With python's `pickle` module
2. With the `joblib` module

#### Pickle

In [7]:
import pandas as pd
import numpy as np

In [9]:
heart_disease = pd.read_csv("../data/heart-disease.csv")
heart_disease.head()

# Split the data
X = heart_disease.drop("target", axis=1)
y = heart_disease["target"]

# Let's use RandomForestClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

# Setup random seed
np.random.seed(42)

# Split data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Fit the model
rfc = RandomForestClassifier()
rfc.fit(X_train, y_train)


# Evaluate the model
rfc.score(X_test, y_test)

0.8524590163934426

In [11]:
import pickle

# Save an existing model to a file
pickle.dump(rfc, open("random_forest_classifier_model_1.pkl", "wb"))

In [12]:
# Load a saved model
loaded_pickle_model = pickle.load(open("random_forest_classifier_model_1.pkl", "rb"))

In [15]:
pickle_preds = loaded_pickle_model.predict(X_test)

In [16]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, pickle_preds)

0.8524590163934426

#### Joblib

In [18]:
from joblib import dump, load

# Save model to a file
dump(rfc, filename="random_forest_classifier_model_1.joblib")

['random_forest_classifier_model_1.joblib']

In [19]:
# Load a saved model
loaded_joblib_model = load(filename="random_forest_classifier_model_1.joblib")

In [20]:
joblib_preds = loaded_joblib_model.predict(X_test)

In [21]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, joblib_preds)

0.8524590163934426