In [1]:
"""
What? Save and load ML models

Two methods are shown:
[1] pickle file
[2] joblib library

Pickle is the standard way of serializing objects in Python. You can use 
the pickle1 operation to serialize your machine learning algorithms and 
save the serialized format to a file.

The Joblib library is part of the SciPy ecosystem and provides utilities for 
pipelining Python jobs. It provides utilities for saving and loading Python 
objects that make use of NumPy data structures, efficiently

https://machinelearningmastery.com/save-load-machine-learning-models-python-scikit-learn/
"""

'\nWhat? Save and load ML models\n\nTwo methods are shown:\n[1] pickle file\n[2] joblib library\n\nPickle is the standard way of serializing objects in Python. You can use \nthe pickle1 operation to serialize your machine learning algorithms and \nsave the serialized format to a file.\n\nThe Joblib library is part of the SciPy ecosystem and provides utilities for \npipelining Python jobs. It provides utilities for saving and loading Python \nobjects that make use of NumPy data structures, efficiently\n\nReference: Machine learning mastery with python, Jason Brownlee\n'

In [2]:
# Import python modules
from pickle import dump
from pickle import load
from pandas import read_csv
from IPython.display import Markdown, display
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [3]:
# Additional functions
def myPrint(string, c = "blue"):    
    """My version of the python-native print command.
    
    Print in bold and red tect
    """
    colorstr = "<span style='color:{}'>{}</span>".format(c, '**'+ string + '**' )    
    display(Markdown(colorstr))

def printPythonModuleVersion():    
    """printPythonModuleVersion
    Quickly list the python module versions
    """
    import scipy
    print('scipy: %s' % scipy.__version__)
    import numpy
    print('numpy: %s' % numpy.__version__)    
    import matplotlib
    print('matplotlib: %s' % matplotlib.__version__)    
    import pandas
    print('pandas: %s' % pandas.__version__)
    import statsmodels
    print('statsmodels: %s' % statsmodels.__version__) 
    import sklearn
    print('sklearn: %s' % sklearn.__version__)

printPythonModuleVersion()

scipy: 1.5.4
numpy: 1.19.4
matplotlib: 3.3.2
pandas: 1.1.4
statsmodels: 0.12.1
sklearn: 0.23.2


In [4]:
myPrint("Importing dataset")
filename = './datasetCollections/pima-indians-diabetes.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class'] 
dataframe = read_csv(filename, names = names)
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]
num_folds = 10
print("Input size: ", X.shape)
print("Labels size: ", Y.shape)

<span style='color:blue'>**Importing dataset**</span>

Input size:  (768, 8)
Labels size:  (768,)


In [5]:
myPrint("Create the ML model")
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=7)
# Fit the model on 33%
model = LogisticRegression(max_iter = 250) 
model.fit(X_train, Y_train)
print("Model score: ", model.score(X_test, Y_test))

<span style='color:blue'>**Create the ML model**</span>

Model score:  0.7874015748031497


In [16]:
myPrint("USING pickle")
print("Save the model to disk")
outputPath = './output/finalized_model.pkl' 
dump(model, open(outputPath, 'wb'))
print("File save at: " + outputPath)
print("load the model from disk")
loaded_model = load(open(outputPath, 'rb')) 
result = loaded_model.score(X_test, Y_test) 
print("Model score: ", result)

<span style='color:blue'>**USING pickle**</span>

Save the model to disk
File save at: ./output/finalized_model.pkl
load the model from disk
Model score:  0.7874015748031497


In [19]:
myPrint("USING joblib")
from joblib import dump, load

# save the model to disk
outputPathJoblib = './output/finalized_model.joblib' 
print("Save the model to disk")
dump(model, outputPathJoblib)
print("File save at: " + outputPathJoblib)

# load the model from disk
print("load the model from disk")
loaded_model = load(outputPathJoblib)
result = loaded_model.score(X_test, Y_test)
print("Model score: ", result)

<span style='color:blue'>**USING joblib**</span>

Save the model to disk
File save at: ./output/finalized_model.joblib
load the model from disk
Model score:  0.7874015748031497
