lyst · NegatioN · Sep 25, 2018 · Nov 4, 2018 · Nov 4, 2018 · Nov 4, 2018
diff --git a/lightfm/lightfm.py b/lightfm/lightfm.py
@@ -20,6 +20,21 @@
 
 CYTHON_DTYPE = np.float32
 
+model_weights = {
+    "user_embeddings",
+    "user_biases",
+    "item_embeddings",
+    "item_biases",
+    "item_bias_momentum",
+    "item_bias_gradients",
+    "item_embedding_momentum",
+    "item_embedding_gradients",
+    "user_bias_momentum",
+    "user_bias_gradients",
+    "user_embedding_momentum",
+    "user_embedding_gradients",
+}
+
 
 class LightFM(object):
     """
@@ -474,6 +489,51 @@ def verbose_range():
 
             return verbose_range()
 
+    def save(self, path):
+        """
+        Saves a model as a numpy-object, keeping all model weights and hyperparameters
+        for re-initialization.
+        This does not keep track of any mappings of items/users you may have in your dataloaders,
+        so also needs to be stored somewhere for full restoration of the model.
+        Model is persisted as a compressed numpy file, and has the .npz extension
+        appended to the path-parameter.
+
+        Parameters
+        ----------
+
+        path: string
+            string-path of location to save the model.
+        """
+        model_params = {value: getattr(self, value) for value in model_weights}
+        hyperparams = self.get_params()
+        model_params.update(hyperparams)
+        np.savez_compressed(path, **model_params)
+
+    @staticmethod
+    def load_uncached(path):
+        """
+        Loads a model saved in the format output by LightFM.save()
+        Example usage:
+        model = LightFM.load_uncached(path_to_saved_model)
+
+        Parameters
+        ----------
+
+        path: string
+            string-path of location to load_uncached the model from.
+        """
+        new_model = LightFM()
+
+        numpy_model = np.load(path)
+        for value in [x for x in numpy_model if x in model_weights]:
+            setattr(new_model, value, numpy_model[value])
+
+        new_model.set_params(
+            **{k: v for k, v in numpy_model.items() if k not in model_weights}
+        )
+
+        return new_model
+
     def fit(
         self,
         interactions,

diff --git a/tests/test_persist.py b/tests/test_persist.py
@@ -0,0 +1,81 @@
+import pytest
+
+import numpy as np
+import os
+
+from sklearn.metrics import roc_auc_score
+
+from lightfm.lightfm import LightFM
+from lightfm.datasets import fetch_movielens
+
+
+def _binarize(dataset):
+
+    positives = dataset.data >= 4.0
+    dataset.data[positives] = 1.0
+    dataset.data[np.logical_not(positives)] = -1.0
+
+    return dataset
+
+
+def _cleanup():
+    os.remove(TEST_FILE_PATH)
+
+
+TEST_FILE_PATH = "./tests/test.npz"
+movielens = fetch_movielens()
+train, test = _binarize(movielens["train"]), _binarize(movielens["test"])
+
+
+def test_all_params_persisted():
+    model = LightFM(loss="warp")
+    model.fit(movielens["train"], epochs=1, num_threads=4)
+    model.save(TEST_FILE_PATH)
+
+    # Load and confirm all model params are present.
+    saved_model_params = list(np.load(TEST_FILE_PATH).keys())
+    for x in dir(model):
+        ob = getattr(model, x)
+        # We don't need to persist model functions, or magic variables of the model.
+        if not callable(ob) and not x.startswith("__"):
+            assert x in saved_model_params
+
+    _cleanup()
+
+
+def test_model_populated():
+    model = LightFM(loss="warp")
+    model.fit(movielens["train"], epochs=1, num_threads=4)
+    model.save(TEST_FILE_PATH)
+
+    # Load a model onto an uninstanciated object
+    loaded_model = LightFM.load_uncached(TEST_FILE_PATH)
+
+    assert loaded_model.item_embeddings.any()
+    assert loaded_model.user_embeddings.any()
+
+    _cleanup()
+
+
+def test_model_performance():
+    # Train and persist a model
+    model = LightFM(random_state=10)
+    model.fit_partial(train, epochs=10, num_threads=4)
+    model.save(TEST_FILE_PATH)
+
+    train_predictions = model.predict(train.row, train.col)
+    test_predictions = model.predict(test.row, test.col)
+
+    trn_pred = roc_auc_score(train.data, train_predictions)
+    tst_pred = roc_auc_score(test.data, test_predictions)
+
+    # Performance is same as before when loaded from disk
+    loaded_model = LightFM.load_uncached(TEST_FILE_PATH)
+
+    train_predictions = loaded_model.predict(train.row, train.col)
+    test_predictions = loaded_model.predict(test.row, test.col)
+
+    assert roc_auc_score(train.data, train_predictions) == trn_pred
+    assert roc_auc_score(test.data, test_predictions) == tst_pred
+
+    _cleanup()