From 1c640fab7791bada18071707391e95b742ce539e Mon Sep 17 00:00:00 2001 From: NegatioN Date: Tue, 25 Sep 2018 13:11:14 +0200 Subject: [PATCH 01/16] Add support for saving and loading models --- lightfm/lightfm.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/lightfm/lightfm.py b/lightfm/lightfm.py index a5897d13..02ce75da 100644 --- a/lightfm/lightfm.py +++ b/lightfm/lightfm.py @@ -20,6 +20,10 @@ CYTHON_DTYPE = np.float32 +model_weights = {'user_embeddings', 'user_biases', 'item_embeddings', 'item_biases', + 'item_bias_momentum', 'item_bias_gradients', 'item_embedding_momentum', + 'item_embedding_gradients', 'user_bias_momentum', 'user_bias_gradients', + 'user_embedding_momentum', 'user_embedding_gradients'} class LightFM(object): """ @@ -474,6 +478,41 @@ def verbose_range(): return verbose_range() + def save(self, path): + """ + Saves a model as a numpy-object, keeping all model weights and hyperparameters + for re-initialization. + This does not keep track of any mappings of items/users you may have in your dataloaders, + so also needs to be stored somewhere for full restoration of the model. + Model is persisted as a compressed numpy file, and has the .npz extension appended to the path-parameter. + + Parameters + ---------- + + path: string + string-path of location to save the model. + """ + model_params = {value: getattr(self, value) for value in model_weights} + hyperparams = self.get_params() + model_params.update(hyperparams) + np.savez_compressed(path, **model_params) + + def load(self, path): + """ + Loads a model saved in the format output by LightFM.save() + + Parameters + ---------- + + path: string + string-path of location to load the model from. + """ + numpy_model = np.load(path) + for value in [x for x in numpy_model if x in model_weights]: + setattr(self, value, numpy_model[value]) + + self.set_params(**{k: v for k, v in numpy_model.items() if k not in model_weights}) + def fit( self, interactions, From a27d15d2766825a7f286433590a0d55401f14593 Mon Sep 17 00:00:00 2001 From: NegatioN Date: Sun, 4 Nov 2018 12:47:03 +0100 Subject: [PATCH 02/16] Add a test to confirm we're saving all relevant parameters --- tests/test_persist.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 tests/test_persist.py diff --git a/tests/test_persist.py b/tests/test_persist.py new file mode 100644 index 00000000..304c1178 --- /dev/null +++ b/tests/test_persist.py @@ -0,0 +1,19 @@ +import numpy as np +import pytest + +from lightfm import LightFM +from lightfm.datasets import fetch_movielens + +def test_all_params_persisted(): + # Train and persist a model + data = fetch_movielens(min_rating=5.0) + model = LightFM(loss='warp') + model.fit(data['train'], epochs=5, num_threads=4) + model.save('./test.npz') + + # Load and confirm all model params are present. + saved_model_params = list(np.load('./test.npz').keys()) + for x in dir(model): + ob = getattr(model, x) + if not callable(ob) and not x.startswith('__'): + assert x in saved_model_params \ No newline at end of file From 26e172f7a7e75e9a7f017325ce390307d7bd4e28 Mon Sep 17 00:00:00 2001 From: NegatioN Date: Sun, 4 Nov 2018 12:52:48 +0100 Subject: [PATCH 03/16] Add test to ensure model is instanciated on load() --- tests/test_persist.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/tests/test_persist.py b/tests/test_persist.py index 304c1178..6e23df64 100644 --- a/tests/test_persist.py +++ b/tests/test_persist.py @@ -16,4 +16,22 @@ def test_all_params_persisted(): for x in dir(model): ob = getattr(model, x) if not callable(ob) and not x.startswith('__'): - assert x in saved_model_params \ No newline at end of file + assert x in saved_model_params + +def test_model_populated(): + # Train and persist a model + data = fetch_movielens(min_rating=5.0) + model = LightFM(loss='warp') + model.fit(data['train'], epochs=5, num_threads=4) + model.save('./test.npz') + + # Load a model onto an uninstanciated object + model = LightFM(loss='warp') + + assert model.item_embeddings == None + assert model.user_embeddings == None + + model.load('./test.npz') + + assert model.item_embeddings.any() + assert model.user_embeddings.any() From 7f63dcd43c8a34229e6399638bfce343540fc5b5 Mon Sep 17 00:00:00 2001 From: NegatioN Date: Sun, 4 Nov 2018 12:57:39 +0100 Subject: [PATCH 04/16] Properly clean up after tests --- tests/test_persist.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/tests/test_persist.py b/tests/test_persist.py index 6e23df64..fb8b41ea 100644 --- a/tests/test_persist.py +++ b/tests/test_persist.py @@ -1,29 +1,36 @@ -import numpy as np import pytest +import numpy as np +import os + from lightfm import LightFM from lightfm.datasets import fetch_movielens +TEST_FILE_PATH = './tests/test.npz' + def test_all_params_persisted(): # Train and persist a model data = fetch_movielens(min_rating=5.0) model = LightFM(loss='warp') model.fit(data['train'], epochs=5, num_threads=4) - model.save('./test.npz') + model.save(TEST_FILE_PATH) # Load and confirm all model params are present. - saved_model_params = list(np.load('./test.npz').keys()) + saved_model_params = list(np.load(TEST_FILE_PATH).keys()) for x in dir(model): ob = getattr(model, x) if not callable(ob) and not x.startswith('__'): assert x in saved_model_params + # Clean up + os.remove(TEST_FILE_PATH) + def test_model_populated(): # Train and persist a model data = fetch_movielens(min_rating=5.0) model = LightFM(loss='warp') model.fit(data['train'], epochs=5, num_threads=4) - model.save('./test.npz') + model.save(TEST_FILE_PATH) # Load a model onto an uninstanciated object model = LightFM(loss='warp') @@ -31,7 +38,10 @@ def test_model_populated(): assert model.item_embeddings == None assert model.user_embeddings == None - model.load('./test.npz') + model.load(TEST_FILE_PATH) assert model.item_embeddings.any() assert model.user_embeddings.any() + + # Clean up + os.remove(TEST_FILE_PATH) From 4f98c9270ee2fb16cd28c74e87e1616fabe42537 Mon Sep 17 00:00:00 2001 From: NegatioN Date: Sun, 4 Nov 2018 12:59:57 +0100 Subject: [PATCH 05/16] Formatting --- lightfm/lightfm.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lightfm/lightfm.py b/lightfm/lightfm.py index 02ce75da..813870e6 100644 --- a/lightfm/lightfm.py +++ b/lightfm/lightfm.py @@ -25,6 +25,7 @@ 'item_embedding_gradients', 'user_bias_momentum', 'user_bias_gradients', 'user_embedding_momentum', 'user_embedding_gradients'} + class LightFM(object): """ A hybrid latent representation recommender model. @@ -484,7 +485,8 @@ def save(self, path): for re-initialization. This does not keep track of any mappings of items/users you may have in your dataloaders, so also needs to be stored somewhere for full restoration of the model. - Model is persisted as a compressed numpy file, and has the .npz extension appended to the path-parameter. + Model is persisted as a compressed numpy file, and has the .npz extension + appended to the path-parameter. Parameters ---------- From a4a5856c722f4802075dd05a79f6b35b24396d34 Mon Sep 17 00:00:00 2001 From: NegatioN Date: Mon, 26 Nov 2018 13:22:15 +0100 Subject: [PATCH 06/16] Reformat to match black settings --- lightfm/lightfm.py | 22 +++++++++++++++++----- tests/test_persist.py | 16 +++++++++------- 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/lightfm/lightfm.py b/lightfm/lightfm.py index 813870e6..a3b1dee6 100644 --- a/lightfm/lightfm.py +++ b/lightfm/lightfm.py @@ -20,10 +20,20 @@ CYTHON_DTYPE = np.float32 -model_weights = {'user_embeddings', 'user_biases', 'item_embeddings', 'item_biases', - 'item_bias_momentum', 'item_bias_gradients', 'item_embedding_momentum', - 'item_embedding_gradients', 'user_bias_momentum', 'user_bias_gradients', - 'user_embedding_momentum', 'user_embedding_gradients'} +model_weights = { + "user_embeddings", + "user_biases", + "item_embeddings", + "item_biases", + "item_bias_momentum", + "item_bias_gradients", + "item_embedding_momentum", + "item_embedding_gradients", + "user_bias_momentum", + "user_bias_gradients", + "user_embedding_momentum", + "user_embedding_gradients", +} class LightFM(object): @@ -513,7 +523,9 @@ def load(self, path): for value in [x for x in numpy_model if x in model_weights]: setattr(self, value, numpy_model[value]) - self.set_params(**{k: v for k, v in numpy_model.items() if k not in model_weights}) + self.set_params( + **{k: v for k, v in numpy_model.items() if k not in model_weights} + ) def fit( self, diff --git a/tests/test_persist.py b/tests/test_persist.py index fb8b41ea..3dc897ef 100644 --- a/tests/test_persist.py +++ b/tests/test_persist.py @@ -6,34 +6,36 @@ from lightfm import LightFM from lightfm.datasets import fetch_movielens -TEST_FILE_PATH = './tests/test.npz' +TEST_FILE_PATH = "./tests/test.npz" + def test_all_params_persisted(): # Train and persist a model data = fetch_movielens(min_rating=5.0) - model = LightFM(loss='warp') - model.fit(data['train'], epochs=5, num_threads=4) + model = LightFM(loss="warp") + model.fit(data["train"], epochs=5, num_threads=4) model.save(TEST_FILE_PATH) # Load and confirm all model params are present. saved_model_params = list(np.load(TEST_FILE_PATH).keys()) for x in dir(model): ob = getattr(model, x) - if not callable(ob) and not x.startswith('__'): + if not callable(ob) and not x.startswith("__"): assert x in saved_model_params # Clean up os.remove(TEST_FILE_PATH) + def test_model_populated(): # Train and persist a model data = fetch_movielens(min_rating=5.0) - model = LightFM(loss='warp') - model.fit(data['train'], epochs=5, num_threads=4) + model = LightFM(loss="warp") + model.fit(data["train"], epochs=5, num_threads=4) model.save(TEST_FILE_PATH) # Load a model onto an uninstanciated object - model = LightFM(loss='warp') + model = LightFM(loss="warp") assert model.item_embeddings == None assert model.user_embeddings == None From 23efc0cf4da51d7042e5c59612df0725180fd7ff Mon Sep 17 00:00:00 2001 From: NegatioN Date: Tue, 4 Dec 2018 13:50:59 +0100 Subject: [PATCH 07/16] Add test to confirm performance of model is identical after model loaded from file --- tests/test_persist.py | 61 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 57 insertions(+), 4 deletions(-) diff --git a/tests/test_persist.py b/tests/test_persist.py index 3dc897ef..1fbfcf07 100644 --- a/tests/test_persist.py +++ b/tests/test_persist.py @@ -3,12 +3,31 @@ import numpy as np import os +from sklearn.metrics import roc_auc_score + from lightfm import LightFM from lightfm.datasets import fetch_movielens TEST_FILE_PATH = "./tests/test.npz" +def _binarize(dataset): + + positives = dataset.data >= 4.0 + dataset.data[positives] = 1.0 + dataset.data[np.logical_not(positives)] = -1.0 + + return dataset + + +def cleanup(): + os.remove(TEST_FILE_PATH) + + +movielens = fetch_movielens() +train, test = _binarize(movielens["train"]), _binarize(movielens["test"]) + + def test_all_params_persisted(): # Train and persist a model data = fetch_movielens(min_rating=5.0) @@ -23,8 +42,7 @@ def test_all_params_persisted(): if not callable(ob) and not x.startswith("__"): assert x in saved_model_params - # Clean up - os.remove(TEST_FILE_PATH) + cleanup() def test_model_populated(): @@ -45,5 +63,40 @@ def test_model_populated(): assert model.item_embeddings.any() assert model.user_embeddings.any() - # Clean up - os.remove(TEST_FILE_PATH) + cleanup() + + +def test_model_performance(): + # Train and persist a model + model = LightFM(random_state=10) + model.fit_partial(train, epochs=10, num_threads=4) + model.save(TEST_FILE_PATH) + + train_predictions = model.predict(train.row, train.col) + test_predictions = model.predict(test.row, test.col) + + trn_pred = roc_auc_score(train.data, train_predictions) + tst_pred = roc_auc_score(test.data, test_predictions) + assert trn_pred > 0.84 + assert tst_pred > 0.76 + + # Performance is worse when trained for 1 epoch + model = LightFM() + model.fit_partial(train, epochs=1, num_threads=4) + + train_predictions = model.predict(train.row, train.col) + test_predictions = model.predict(test.row, test.col) + + assert roc_auc_score(train.data, train_predictions) < 0.84 + assert roc_auc_score(test.data, test_predictions) < 0.76 + + # Performance is same as previous when loaded from disk + model.load(TEST_FILE_PATH) + + train_predictions = model.predict(train.row, train.col) + test_predictions = model.predict(test.row, test.col) + + assert roc_auc_score(train.data, train_predictions) == trn_pred + assert roc_auc_score(test.data, test_predictions) == tst_pred + + cleanup() From 16bf53bde4ac1de28cdf8aaf297527d6ae9c2eb6 Mon Sep 17 00:00:00 2001 From: NegatioN Date: Tue, 4 Dec 2018 14:11:32 +0100 Subject: [PATCH 08/16] Change load method to classmethod --- lightfm/lightfm.py | 13 ++++++++++--- tests/test_persist.py | 27 ++++++--------------------- 2 files changed, 16 insertions(+), 24 deletions(-) diff --git a/lightfm/lightfm.py b/lightfm/lightfm.py index a3b1dee6..f3735f86 100644 --- a/lightfm/lightfm.py +++ b/lightfm/lightfm.py @@ -509,9 +509,12 @@ def save(self, path): model_params.update(hyperparams) np.savez_compressed(path, **model_params) - def load(self, path): + @classmethod + def load(cls, path): """ Loads a model saved in the format output by LightFM.save() + Example usage: + model = LightFM.load(path_to_saved_model) Parameters ---------- @@ -519,14 +522,18 @@ def load(self, path): path: string string-path of location to load the model from. """ + new_model = cls() + numpy_model = np.load(path) for value in [x for x in numpy_model if x in model_weights]: - setattr(self, value, numpy_model[value]) + setattr(new_model, value, numpy_model[value]) - self.set_params( + new_model.set_params( **{k: v for k, v in numpy_model.items() if k not in model_weights} ) + return new_model + def fit( self, interactions, diff --git a/tests/test_persist.py b/tests/test_persist.py index 1fbfcf07..83b4dad6 100644 --- a/tests/test_persist.py +++ b/tests/test_persist.py @@ -53,15 +53,10 @@ def test_model_populated(): model.save(TEST_FILE_PATH) # Load a model onto an uninstanciated object - model = LightFM(loss="warp") - - assert model.item_embeddings == None - assert model.user_embeddings == None - - model.load(TEST_FILE_PATH) + loaded_model = LightFM.load(TEST_FILE_PATH) - assert model.item_embeddings.any() - assert model.user_embeddings.any() + assert loaded_model.item_embeddings.any() + assert loaded_model.user_embeddings.any() cleanup() @@ -80,21 +75,11 @@ def test_model_performance(): assert trn_pred > 0.84 assert tst_pred > 0.76 - # Performance is worse when trained for 1 epoch - model = LightFM() - model.fit_partial(train, epochs=1, num_threads=4) - - train_predictions = model.predict(train.row, train.col) - test_predictions = model.predict(test.row, test.col) - - assert roc_auc_score(train.data, train_predictions) < 0.84 - assert roc_auc_score(test.data, test_predictions) < 0.76 - # Performance is same as previous when loaded from disk - model.load(TEST_FILE_PATH) + loaded_model = LightFM.load(TEST_FILE_PATH) - train_predictions = model.predict(train.row, train.col) - test_predictions = model.predict(test.row, test.col) + train_predictions = loaded_model.predict(train.row, train.col) + test_predictions = loaded_model.predict(test.row, test.col) assert roc_auc_score(train.data, train_predictions) == trn_pred assert roc_auc_score(test.data, test_predictions) == tst_pred From 4beaebd4b687e101ecdca8ca5e0a58393dc6d26c Mon Sep 17 00:00:00 2001 From: NegatioN Date: Tue, 4 Dec 2018 14:14:06 +0100 Subject: [PATCH 09/16] Remove redundant check of actual model performance --- tests/test_persist.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_persist.py b/tests/test_persist.py index 83b4dad6..bcc1b3b2 100644 --- a/tests/test_persist.py +++ b/tests/test_persist.py @@ -72,10 +72,8 @@ def test_model_performance(): trn_pred = roc_auc_score(train.data, train_predictions) tst_pred = roc_auc_score(test.data, test_predictions) - assert trn_pred > 0.84 - assert tst_pred > 0.76 - # Performance is same as previous when loaded from disk + # Performance is same as before when loaded from disk loaded_model = LightFM.load(TEST_FILE_PATH) train_predictions = loaded_model.predict(train.row, train.col) From 29683581d745b1e0d3e43f74b30ad4dbf3d7058f Mon Sep 17 00:00:00 2001 From: NegatioN Date: Tue, 4 Dec 2018 14:15:27 +0100 Subject: [PATCH 10/16] Don't load dataset multiple times --- tests/test_persist.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/test_persist.py b/tests/test_persist.py index bcc1b3b2..5a1b0433 100644 --- a/tests/test_persist.py +++ b/tests/test_persist.py @@ -29,16 +29,15 @@ def cleanup(): def test_all_params_persisted(): - # Train and persist a model - data = fetch_movielens(min_rating=5.0) model = LightFM(loss="warp") - model.fit(data["train"], epochs=5, num_threads=4) + model.fit(movielens["train"], epochs=1, num_threads=4) model.save(TEST_FILE_PATH) # Load and confirm all model params are present. saved_model_params = list(np.load(TEST_FILE_PATH).keys()) for x in dir(model): ob = getattr(model, x) + # We don't need to persist model functions, or magic variables of the model. if not callable(ob) and not x.startswith("__"): assert x in saved_model_params @@ -46,10 +45,8 @@ def test_all_params_persisted(): def test_model_populated(): - # Train and persist a model - data = fetch_movielens(min_rating=5.0) model = LightFM(loss="warp") - model.fit(data["train"], epochs=5, num_threads=4) + model.fit(movielens["train"], epochs=1, num_threads=4) model.save(TEST_FILE_PATH) # Load a model onto an uninstanciated object From 56060a7b6a9c9a06337f0fd49a1149fdc466eb74 Mon Sep 17 00:00:00 2001 From: NegatioN Date: Tue, 4 Dec 2018 14:17:49 +0100 Subject: [PATCH 11/16] reformat --- tests/test_persist.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/test_persist.py b/tests/test_persist.py index 5a1b0433..02ca505a 100644 --- a/tests/test_persist.py +++ b/tests/test_persist.py @@ -8,8 +8,6 @@ from lightfm import LightFM from lightfm.datasets import fetch_movielens -TEST_FILE_PATH = "./tests/test.npz" - def _binarize(dataset): @@ -20,10 +18,11 @@ def _binarize(dataset): return dataset -def cleanup(): +def _cleanup(): os.remove(TEST_FILE_PATH) +TEST_FILE_PATH = "./tests/test.npz" movielens = fetch_movielens() train, test = _binarize(movielens["train"]), _binarize(movielens["test"]) @@ -41,7 +40,7 @@ def test_all_params_persisted(): if not callable(ob) and not x.startswith("__"): assert x in saved_model_params - cleanup() + _cleanup() def test_model_populated(): @@ -55,7 +54,7 @@ def test_model_populated(): assert loaded_model.item_embeddings.any() assert loaded_model.user_embeddings.any() - cleanup() + _cleanup() def test_model_performance(): @@ -79,4 +78,4 @@ def test_model_performance(): assert roc_auc_score(train.data, train_predictions) == trn_pred assert roc_auc_score(test.data, test_predictions) == tst_pred - cleanup() + _cleanup() From 12ab4366fb3d988d3593bdb856b26dbf3b6a16ae Mon Sep 17 00:00:00 2001 From: NegatioN Date: Tue, 4 Dec 2018 14:38:13 +0100 Subject: [PATCH 12/16] Trigger rebuild --- lightfm/lightfm.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lightfm/lightfm.py b/lightfm/lightfm.py index f3735f86..34860504 100644 --- a/lightfm/lightfm.py +++ b/lightfm/lightfm.py @@ -515,6 +515,7 @@ def load(cls, path): Loads a model saved in the format output by LightFM.save() Example usage: model = LightFM.load(path_to_saved_model) + Trigger rebuild to test. Parameters ---------- From b98171035dd2ebdff02712ec034a93ee90cd359e Mon Sep 17 00:00:00 2001 From: NegatioN Date: Tue, 4 Dec 2018 14:46:09 +0100 Subject: [PATCH 13/16] Change from classmethod to staticmethod since circleCI is not having it --- lightfm/lightfm.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lightfm/lightfm.py b/lightfm/lightfm.py index 34860504..0874fb8e 100644 --- a/lightfm/lightfm.py +++ b/lightfm/lightfm.py @@ -509,13 +509,12 @@ def save(self, path): model_params.update(hyperparams) np.savez_compressed(path, **model_params) - @classmethod - def load(cls, path): + @staticmethod + def load(path): """ Loads a model saved in the format output by LightFM.save() Example usage: model = LightFM.load(path_to_saved_model) - Trigger rebuild to test. Parameters ---------- @@ -523,7 +522,7 @@ def load(cls, path): path: string string-path of location to load the model from. """ - new_model = cls() + new_model = LightFM() numpy_model = np.load(path) for value in [x for x in numpy_model if x in model_weights]: From 6e556ad54b9f629b8c241c58ce1eec9d3d3187a6 Mon Sep 17 00:00:00 2001 From: NegatioN Date: Tue, 4 Dec 2018 14:49:13 +0100 Subject: [PATCH 14/16] Trigger build --- lightfm/lightfm.py | 6 +++--- tests/test_persist.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lightfm/lightfm.py b/lightfm/lightfm.py index 0874fb8e..f5afb654 100644 --- a/lightfm/lightfm.py +++ b/lightfm/lightfm.py @@ -510,17 +510,17 @@ def save(self, path): np.savez_compressed(path, **model_params) @staticmethod - def load(path): + def load_uncached(path): """ Loads a model saved in the format output by LightFM.save() Example usage: - model = LightFM.load(path_to_saved_model) + model = LightFM.load_uncached(path_to_saved_model) Parameters ---------- path: string - string-path of location to load the model from. + string-path of location to load_uncached the model from. """ new_model = LightFM() diff --git a/tests/test_persist.py b/tests/test_persist.py index 02ca505a..de5025a9 100644 --- a/tests/test_persist.py +++ b/tests/test_persist.py @@ -5,7 +5,7 @@ from sklearn.metrics import roc_auc_score -from lightfm import LightFM +from lightfm.lightfm import LightFM from lightfm.datasets import fetch_movielens @@ -49,7 +49,7 @@ def test_model_populated(): model.save(TEST_FILE_PATH) # Load a model onto an uninstanciated object - loaded_model = LightFM.load(TEST_FILE_PATH) + loaded_model = LightFM.load_uncached(TEST_FILE_PATH) assert loaded_model.item_embeddings.any() assert loaded_model.user_embeddings.any() @@ -70,7 +70,7 @@ def test_model_performance(): tst_pred = roc_auc_score(test.data, test_predictions) # Performance is same as before when loaded from disk - loaded_model = LightFM.load(TEST_FILE_PATH) + loaded_model = LightFM.load_uncached(TEST_FILE_PATH) train_predictions = loaded_model.predict(train.row, train.col) test_predictions = loaded_model.predict(test.row, test.col) From 67af48b6915da881398a2ac4eb27e60165eb1c43 Mon Sep 17 00:00:00 2001 From: NegatioN Date: Sat, 15 Dec 2018 14:11:38 +0100 Subject: [PATCH 15/16] use pytest fixtures to instanziate and clean up --- lightfm/lightfm.py | 2 +- tests/test_persist.py | 108 +++++++++++++++++++----------------------- 2 files changed, 51 insertions(+), 59 deletions(-) diff --git a/lightfm/lightfm.py b/lightfm/lightfm.py index f5afb654..1cb0dc7c 100644 --- a/lightfm/lightfm.py +++ b/lightfm/lightfm.py @@ -510,7 +510,7 @@ def save(self, path): np.savez_compressed(path, **model_params) @staticmethod - def load_uncached(path): + def load(path): """ Loads a model saved in the format output by LightFM.save() Example usage: diff --git a/tests/test_persist.py b/tests/test_persist.py index de5025a9..119e8718 100644 --- a/tests/test_persist.py +++ b/tests/test_persist.py @@ -5,12 +5,11 @@ from sklearn.metrics import roc_auc_score -from lightfm.lightfm import LightFM +from lightfm.lightfm import LightFM, model_weights from lightfm.datasets import fetch_movielens def _binarize(dataset): - positives = dataset.data >= 4.0 dataset.data[positives] = 1.0 dataset.data[np.logical_not(positives)] = -1.0 @@ -18,64 +17,57 @@ def _binarize(dataset): return dataset -def _cleanup(): - os.remove(TEST_FILE_PATH) - - TEST_FILE_PATH = "./tests/test.npz" movielens = fetch_movielens() train, test = _binarize(movielens["train"]), _binarize(movielens["test"]) -def test_all_params_persisted(): - model = LightFM(loss="warp") - model.fit(movielens["train"], epochs=1, num_threads=4) - model.save(TEST_FILE_PATH) - - # Load and confirm all model params are present. - saved_model_params = list(np.load(TEST_FILE_PATH).keys()) - for x in dir(model): - ob = getattr(model, x) - # We don't need to persist model functions, or magic variables of the model. - if not callable(ob) and not x.startswith("__"): - assert x in saved_model_params - - _cleanup() - - -def test_model_populated(): - model = LightFM(loss="warp") - model.fit(movielens["train"], epochs=1, num_threads=4) - model.save(TEST_FILE_PATH) - - # Load a model onto an uninstanciated object - loaded_model = LightFM.load_uncached(TEST_FILE_PATH) - - assert loaded_model.item_embeddings.any() - assert loaded_model.user_embeddings.any() - - _cleanup() - - -def test_model_performance(): - # Train and persist a model - model = LightFM(random_state=10) - model.fit_partial(train, epochs=10, num_threads=4) - model.save(TEST_FILE_PATH) - - train_predictions = model.predict(train.row, train.col) - test_predictions = model.predict(test.row, test.col) - - trn_pred = roc_auc_score(train.data, train_predictions) - tst_pred = roc_auc_score(test.data, test_predictions) - - # Performance is same as before when loaded from disk - loaded_model = LightFM.load_uncached(TEST_FILE_PATH) - - train_predictions = loaded_model.predict(train.row, train.col) - test_predictions = loaded_model.predict(test.row, test.col) - - assert roc_auc_score(train.data, train_predictions) == trn_pred - assert roc_auc_score(test.data, test_predictions) == tst_pred - - _cleanup() +class TestPersist: + @pytest.fixture + def model(self): + # Train and persist a model + model = LightFM(random_state=10) + model.fit(movielens["train"], epochs=5, num_threads=4) + model.save(TEST_FILE_PATH) + return model + + @classmethod + def teardown_class(cls): + os.remove(TEST_FILE_PATH) + + def test_all_params_persisted(self, model): + # Load and confirm all model params are present. + saved_model_params = list(np.load(TEST_FILE_PATH).keys()) + for x in dir(model): + ob = getattr(model, x) + # We don't need to persist model functions, or magic variables of the model. + if not callable(ob) and not x.startswith("__"): + assert x in saved_model_params + + def test_all_loaded_weights_numpy_arrays(self, model): + # Load a model onto an uninstanciated object + loaded_model = LightFM.load(TEST_FILE_PATH) + + for weight_name in model_weights: + assert callable(getattr(loaded_model, weight_name).any) + + def test_model_performance(self, model): + train_predictions = model.predict(train.row, train.col) + test_predictions = model.predict(test.row, test.col) + + trn_pred = roc_auc_score(train.data, train_predictions) + tst_pred = roc_auc_score(test.data, test_predictions) + + # Performance is same as before when loaded from disk + loaded_model = LightFM.load(TEST_FILE_PATH) + + train_predictions = loaded_model.predict(train.row, train.col) + test_predictions = loaded_model.predict(test.row, test.col) + + # Use approximately equal because floating point math may make our summation slightly different. + assert roc_auc_score(train.data, train_predictions) == pytest.approx( + trn_pred, 0.0001 + ) + assert roc_auc_score(test.data, test_predictions) == pytest.approx( + tst_pred, 0.0001 + ) From b7fd48d13d3e9f1b3672320056773fc8395470f4 Mon Sep 17 00:00:00 2001 From: NegatioN Date: Sat, 15 Dec 2018 14:18:10 +0100 Subject: [PATCH 16/16] also update method tooltip --- lightfm/lightfm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lightfm/lightfm.py b/lightfm/lightfm.py index 1cb0dc7c..0874fb8e 100644 --- a/lightfm/lightfm.py +++ b/lightfm/lightfm.py @@ -514,13 +514,13 @@ def load(path): """ Loads a model saved in the format output by LightFM.save() Example usage: - model = LightFM.load_uncached(path_to_saved_model) + model = LightFM.load(path_to_saved_model) Parameters ---------- path: string - string-path of location to load_uncached the model from. + string-path of location to load the model from. """ new_model = LightFM()