### Exercise 3.15: Saving and Loading Models

In this exercise, we will create a TFIDF representation of sentences. Then, we will save this model on disk and later load it from the disk.

In [1]:
import pickle

from joblib import dump, load

from sklearn.feature_extraction.text import TfidfVectorizer

In [2]:
corpus = ['Data Science is an overlap between Arts and Science',\
          'Generally, Arts graduates are right-brained and '\
          'Science graduates are left-brained', \
          'Excelling in both Arts and Science at a time '\
          'becomes difficult', \
          'Natural Language Processing is a part of Data Science']

In [3]:
tfidf_model = TfidfVectorizer()

tfidf_vectors = tfidf_model.fit_transform(corpus).todense()

print(tfidf_vectors)

[[0.40332811 0.25743911 0.         0.25743911 0.         0.
  0.40332811 0.         0.         0.31798852 0.         0.
  0.         0.         0.         0.31798852 0.         0.
  0.         0.         0.40332811 0.         0.         0.
  0.42094668 0.        ]
 [0.         0.159139   0.49864399 0.159139   0.         0.
  0.         0.         0.49864399 0.         0.         0.
  0.24932199 0.49864399 0.         0.         0.         0.24932199
  0.         0.         0.         0.         0.         0.24932199
  0.13010656 0.        ]
 [0.         0.22444946 0.         0.22444946 0.35164346 0.35164346
  0.         0.35164346 0.         0.         0.35164346 0.35164346
  0.         0.         0.35164346 0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.18350214 0.35164346]
 [0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.30887228 0.         0.
  0.         0.         0.         0.30887228 0.39176

In [4]:
# Save this TFIDF model on disk using joblib.
dump(tfidf_model, 'tfidf_model.joblib')

['tfidf_model.joblib']

In [5]:
# Load TFIDF model from disk to memory.
tfidf_model_loaded = load('tfidf_model.joblib')

loaded_tfidf_vectors = tfidf_model_loaded.transform(corpus).todense()

print(loaded_tfidf_vectors)

[[0.40332811 0.25743911 0.         0.25743911 0.         0.
  0.40332811 0.         0.         0.31798852 0.         0.
  0.         0.         0.         0.31798852 0.         0.
  0.         0.         0.40332811 0.         0.         0.
  0.42094668 0.        ]
 [0.         0.159139   0.49864399 0.159139   0.         0.
  0.         0.         0.49864399 0.         0.         0.
  0.24932199 0.49864399 0.         0.         0.         0.24932199
  0.         0.         0.         0.         0.         0.24932199
  0.13010656 0.        ]
 [0.         0.22444946 0.         0.22444946 0.35164346 0.35164346
  0.         0.35164346 0.         0.         0.35164346 0.35164346
  0.         0.         0.35164346 0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.18350214 0.35164346]
 [0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.30887228 0.         0.
  0.         0.         0.         0.30887228 0.39176

In [6]:
# Save TFIDF model using pickle
pickle.dump(tfidf_model, open("tfidf_model.pickle.dat", "wb"))

In [7]:
# Load pickle model from disk into memory and use it.
loaded_model = pickle.load(open("tfidf_model.pickle.dat", "rb"))

loaded_tfidf_vectors = loaded_model.transform(corpus).todense()

print(loaded_tfidf_vectors)

[[0.40332811 0.25743911 0.         0.25743911 0.         0.
  0.40332811 0.         0.         0.31798852 0.         0.
  0.         0.         0.         0.31798852 0.         0.
  0.         0.         0.40332811 0.         0.         0.
  0.42094668 0.        ]
 [0.         0.159139   0.49864399 0.159139   0.         0.
  0.         0.         0.49864399 0.         0.         0.
  0.24932199 0.49864399 0.         0.         0.         0.24932199
  0.         0.         0.         0.         0.         0.24932199
  0.13010656 0.        ]
 [0.         0.22444946 0.         0.22444946 0.35164346 0.35164346
  0.         0.35164346 0.         0.         0.35164346 0.35164346
  0.         0.         0.35164346 0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.18350214 0.35164346]
 [0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.30887228 0.         0.
  0.         0.         0.         0.30887228 0.39176