# Model Persistence with Pickle & JobLib


In [2]:
from sklearn import datasets

In [3]:
digits=datasets.load_digits()  # Load the Digits dataset

In [4]:
type(digits)

sklearn.datasets.base.Bunch

In [5]:
type(digits.data)  

numpy.ndarray

## Features

In [8]:
digits.data  

array([[  0.,   0.,   5., ...,   0.,   0.,   0.],
       [  0.,   0.,   0., ...,  10.,   0.,   0.],
       [  0.,   0.,   0., ...,  16.,   9.,   0.],
       ..., 
       [  0.,   0.,   1., ...,   6.,   0.,   0.],
       [  0.,   0.,   2., ...,  12.,   0.,   0.],
       [  0.,   0.,  10., ...,  12.,   1.,   0.]])

In [6]:
digits.data.shape

(1797, 64)

In [22]:
digits.images[0]  # Each image is of shape (8,8)

array([[  0.,   0.,   5.,  13.,   9.,   1.,   0.,   0.],
       [  0.,   0.,  13.,  15.,  10.,  15.,   5.,   0.],
       [  0.,   3.,  15.,   2.,   0.,  11.,   8.,   0.],
       [  0.,   4.,  12.,   0.,   0.,   8.,   8.,   0.],
       [  0.,   5.,   8.,   0.,   0.,   9.,   8.,   0.],
       [  0.,   4.,  11.,   0.,   1.,  12.,   7.,   0.],
       [  0.,   2.,  14.,   5.,  10.,  12.,   0.,   0.],
       [  0.,   0.,   6.,  13.,  10.,   0.,   0.,   0.]])

## Labels

In [23]:
digits.target  # Samples have 10 possible classes (digits 0 through 9)

array([0, 1, 2, ..., 8, 9, 8])

In [24]:
digits.target.shape

(1797,)

## Build the Model

In [26]:
# Import Support Vector Classification
from sklearn import svm 
clf=svm.SVC(gamma=0.001,C=100.)

In [31]:
clf.fit(digits.data[:-1],digits.target[:-1])  # Fit the Model except for last sample

SVC(C=100.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [32]:
clf.predict(digits.data[-1:])  # Predict the digit for last sample

array([8])

In [33]:
digits.target[-1]  # Actual value of last sample

8

In [34]:
digits.images[-1]  # Display last sample image data 

array([[  0.,   0.,  10.,  14.,   8.,   1.,   0.,   0.],
       [  0.,   2.,  16.,  14.,   6.,   1.,   0.,   0.],
       [  0.,   0.,  15.,  15.,   8.,  15.,   0.,   0.],
       [  0.,   0.,   5.,  16.,  16.,  10.,   0.,   0.],
       [  0.,   0.,  12.,  15.,  15.,  12.,   0.,   0.],
       [  0.,   4.,  16.,   6.,   4.,  16.,   6.,   0.],
       [  0.,   8.,  16.,  10.,   8.,  16.,   8.,   0.],
       [  0.,   1.,   8.,  12.,  14.,  12.,   1.,   0.]])

## Save Model using Pickle

In [36]:
import pickle
s=pickle.dumps(clf)

## Load Saved Model and Predict

In [40]:
clf2=pickle.loads(s)
clf2.predict(digits.data[-1:])

array([8])

## Save Model using JobLib (More efficient on Big Data)

In [41]:
from sklearn.externals import joblib
joblib.dump(clf,'clf_model.pkl')  # Writes to the Disk unlike Pickle which writes to a string

['clf_model.pkl']

In [42]:
clf3=joblib.load('clf_model.pkl')

In [43]:
clf3.predict(digits.data[-1:])

array([8])