# Saving and Loading Model
* 1. Use pickle 
* 2. Use joblib 

In [1]:
import pandas as pd
import numpy as np
dataset = pd.read_csv("./../heart.csv")

In [2]:
from sklearn.metrics import accuracy_score, precision_score,recall_score ,f1_score
def evaluate_metrics(Y_actual,Y_pred):
    accuracy = accuracy_score(Y_actual,Y_pred)
    precision = precision_score(Y_actual,Y_pred,zero_division=0)
    recall = recall_score(Y_actual,Y_pred, zero_division=0)
    f1 = f1_score(Y_actual,Y_pred)
    scores = {"accuracy":accuracy,
              "precision":precision,
              "recall":recall,
              "f1":f1}
    # we can also print these scores
    print(f"Accuracy:{round(accuracy,2)}")
    print(f"precision:{round(precision,2)}")
    print(f"recall:{round(recall,2)}")
    print(f"f1_score:{round(f1,2)}")
    
    return scores

In [10]:
from sklearn.model_selection import train_test_split
# lets shuffle the data
mixed_dataset = dataset.sample(frac=1)
# X and Y dataset samples
X = dataset.drop("target",axis = 1)
Y = dataset["target"]

# Now let's split
X_train,X_test,Y_train, Y_test = train_test_split(X_temp,Y_temp,test_size = 0.1765, random_state=42, stratify=Y_temp)

In [11]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(n_jobs=1)

In [12]:
from sklearn.model_selection import GridSearchCV
h_param_for_grid = {
        "max_depth":[5,10],
    "n_estimators":[10,100],
    "min_samples_split":[4,6]
}
gscv = GridSearchCV(
estimator= clf,
    param_grid=h_param_for_grid,
    cv = 3,
    verbose = 2
)
gscv.fit(X_train,Y_train)

Fitting 3 folds for each of 8 candidates, totalling 24 fits
[CV] END ..max_depth=5, min_samples_split=4, n_estimators=10; total time=   0.0s
[CV] END ..max_depth=5, min_samples_split=4, n_estimators=10; total time=   0.0s
[CV] END ..max_depth=5, min_samples_split=4, n_estimators=10; total time=   0.0s
[CV] END .max_depth=5, min_samples_split=4, n_estimators=100; total time=   0.3s
[CV] END .max_depth=5, min_samples_split=4, n_estimators=100; total time=   0.3s
[CV] END .max_depth=5, min_samples_split=4, n_estimators=100; total time=   0.3s
[CV] END ..max_depth=5, min_samples_split=6, n_estimators=10; total time=   0.0s
[CV] END ..max_depth=5, min_samples_split=6, n_estimators=10; total time=   0.0s
[CV] END ..max_depth=5, min_samples_split=6, n_estimators=10; total time=   0.0s
[CV] END .max_depth=5, min_samples_split=6, n_estimators=100; total time=   0.3s
[CV] END .max_depth=5, min_samples_split=6, n_estimators=100; total time=   0.3s
[CV] END .max_depth=5, min_samples_split=6, n_est

0,1,2
,estimator,RandomForestC...fier(n_jobs=1)
,param_grid,"{'max_depth': [5, 10], 'min_samples_split': [4, 6], 'n_estimators': [10, 100]}"
,scoring,
,n_jobs,
,refit,True
,cv,3
,verbose,2
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,10
,min_samples_split,6
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [13]:
# run predictions and get the results
gscv_pred_y_labels = gscv.predict(X_test)
# get results
gscv_result =  evaluate_metrics(Y_test,gscv_pred_y_labels)

Accuracy:0.8
precision:0.79
recall:0.88
f1_score:0.83


## Saving Model using Pickle

In [16]:
import pickle
pickle.dump(gscv,open("gridSearchCV_model.pkl","wb"))

## Loading Model

In [17]:
saved_model = pickle.load(open("gridSearchCV_model.pkl","rb"))

## Predicting using saved model

In [18]:
pred_Y_labels = saved_model.predict(X_test)
# evaluate the predicted labels with the evaluate_metrics_functions
evaluate_metrics(Y_test,pred_Y_labels)

Accuracy:0.8
precision:0.79
recall:0.88
f1_score:0.83


{'accuracy': 0.8043478260869565,
 'precision': 0.7857142857142857,
 'recall': 0.88,
 'f1': 0.8301886792452831}

# 2. saving and loading model using joblib

In [19]:
from joblib import dump, load

# save the model
dump(gscv,filename="joblib_saved_model.joblib")

['joblib_saved_model.joblib']

In [20]:
model = load(filename="joblib_saved_model.joblib")

In [22]:
Y_labels_pred = model.predict(X_test)

In [23]:
evaluate_metrics(Y_test,Y_labels_pred)

Accuracy:0.8
precision:0.79
recall:0.88
f1_score:0.83


{'accuracy': 0.8043478260869565,
 'precision': 0.7857142857142857,
 'recall': 0.88,
 'f1': 0.8301886792452831}