## Save and Load Machine Learning Models in Python with scikit-learn
https://machinelearningmastery.com/save-load-machine-learning-models-python-scikit-learn/


### Finalize your model with pickle

In [10]:
import pandas as pd
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
import pickle

In [11]:
pd.__version__

'0.25.1'

In [33]:
import sklearn
print('sklearn: %s' %sklearn.__version__)

sklearn: 0.21.3


In [12]:
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = pd.read_csv(url, names=names)
dataframe

Unnamed: 0,preg,plas,pres,skin,test,mass,pedi,age,class
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [5]:
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]
test_size = 0.33
seed = 7
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=test_size, random_state=seed)

In [36]:
# Fit the model on training set
model = LogisticRegression()
model.fit(X_train, Y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

In [37]:
# save the model to disk
filename = 'finalized_model_pickle.sav'
pickle.dump(model, open(filename, 'wb'))

In [38]:
# some time later...
 
# load the model from disk
filename_l = 'finalized_model_pickle.sav'
loaded_model = pickle.load(open(filename_l, 'rb'))
result = loaded_model.score(X_test, Y_test)
print(result)

0.7559055118110236


## Finalize Your Model with joblib

Joblib is part of the SciPy ecosystem and provides utilities for pipelining Python jobs.
It provides utilities for saving and loading Python objects that make use of NumPy data structures, efficiently.
This can be useful for some machine learning algorithms that require a lot of parameters or store the entire dataset (like K-Nearest Neighbors).

In [19]:
# Save Model Using joblib
import pandas
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
import joblib

In [22]:
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = pandas.read_csv(url, names=names)
dataframe.to_csv('pima-indians-diabetes.data.csv')

In [25]:
dataframe.describe()

Unnamed: 0,preg,plas,pres,skin,test,mass,pedi,age,class
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [26]:
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]
test_size = 0.33
seed = 7
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=test_size, random_state=seed)

In [28]:
X_train

array([[  3.   , 102.   ,  44.   , ...,  30.8  ,   0.4  ,  26.   ],
       [  1.   ,  77.   ,  56.   , ...,  33.3  ,   1.251,  24.   ],
       [  9.   , 124.   ,  70.   , ...,  35.4  ,   0.282,  34.   ],
       ...,
       [  0.   ,  57.   ,  60.   , ...,  21.7  ,   0.735,  67.   ],
       [  1.   , 105.   ,  58.   , ...,  24.3  ,   0.187,  21.   ],
       [  8.   , 179.   ,  72.   , ...,  32.7  ,   0.719,  36.   ]])

In [39]:
# Fit the model on training set
model = LogisticRegression()
model.fit(X_train, Y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

In [40]:
# save the model to disk
filename = 'finalized_model_joblib.sav'
joblib.dump(model, filename)

['finalized_model_joblib.sav']

In [41]:
# some time later...
 
# load the model from disk
filename_l2 = 'finalized_model_joblib.sav'
loaded_model = joblib.load(filename_l2)
result = loaded_model.score(X_test, Y_test)
print(result)

0.7559055118110236
