Merge pull request #296 from mdekstrand/feature/ii-ignore-ratings

Add use_ratings switch to item-item k-NN
lenskit · Feb 2, 2022 · 3a8b62f · 3a8b62f
2 parents 837f074 + dfa1b2d
commit 3a8b62f
Show file tree

Hide file tree

Showing 3 changed files with 63 additions and 7 deletions.
diff --git a/lenskit/__init__.py b/lenskit/__init__.py
@@ -13,3 +13,10 @@ class DataWarning(UserWarning):
     Warning raised for detectable problems with input data.
     """
     pass
+
+
+class ConfigWarning(UserWarning):
+    """
+    Warning raised for detectable problems with algorithm configurations.
+    """
+    pass
diff --git a/lenskit/algorithms/item_knn.py b/lenskit/algorithms/item_knn.py
@@ -15,7 +15,7 @@
 from numba import njit, prange
 from numba.typed import List
 
-from lenskit import util, DataWarning
+from lenskit import util, DataWarning, ConfigWarning
 from lenskit.data import sparse_ratings
 from lenskit.sharing import in_share_context
 from lenskit.util.parallel import is_mp_worker
@@ -228,10 +228,13 @@ class ItemItem(Predictor):
             (``None`` for unlimited)
         center(bool):
             whether to normalize (mean-center) rating vectors prior to computing similarities
-            and aggregating user rating values.  Turn this off when working with unary data
-            and other data types that don't respond well to centering.
+            and aggregating user rating values.  Defaults to ``True``; turn this off when working
+            with unary data and other data types that don't respond well to centering.
         aggregate:
-            the type of aggregation to do. Can be ``weighted-average`` or ``sum``.
+            the type of aggregation to do. Can be ``weighted-average`` (the default) or ``sum``.
+        use_ratings:
+            whether or not to use the rating values. If ``False``, it ignores rating values and
+            considers an implicit feedback signal of 1 for every (user,item) pair present.
 
     Attributes:
         item_index_(pandas.Index): the index of item IDs.
@@ -246,7 +249,7 @@ class ItemItem(Predictor):
     RATING_AGGS = [AGG_WA]  # the aggregates that use rating values
 
     def __init__(self, nnbrs, min_nbrs=1, min_sim=1.0e-6, save_nbrs=None,
-                 center=True, aggregate='weighted-average'):
+                 center=True, aggregate='weighted-average', use_ratings=True):
         self.nnbrs = nnbrs
         if self.nnbrs is not None and self.nnbrs < 1:
             self.nnbrs = -1
@@ -257,6 +260,20 @@ def __init__(self, nnbrs, min_nbrs=1, min_sim=1.0e-6, save_nbrs=None,
         self.save_nbrs = save_nbrs
         self.center = center
         self.aggregate = aggregate
+        self.use_ratings = use_ratings
+        if not use_ratings:
+            if center:
+                _logger.warning('item-item configured to ignore ratings, but ``center=True`` - likely bug')
+                warnings.warn(util.clean_str('''
+                    item-item configured to ignore ratings, but ``center=True``.  This configuration
+                    is unlikely to work well.
+                '''), ConfigWarning)
+            if aggregate == 'weighted-average':
+                _logger.warning('item-item configured to ignore ratings, but using weighted averages - likely bug')
+                warnings.warn(util.clean_str('''
+                    item-item configured to ignore ratings, but use weighted averages.  This configuration
+                    is unlikely to work well.
+                '''), ConfigWarning)
 
     def fit(self, ratings, **kwargs):
         """

diff --git a/tests/test_knn_item_item.py b/tests/test_knn_item_item.py
@@ -1,4 +1,4 @@
-from lenskit import DataWarning
+from lenskit import ConfigWarning, DataWarning
 from lenskit.algorithms import Recommender
 from lenskit.algorithms.basic import Fallback
 from lenskit.algorithms.bias import Bias
@@ -160,6 +160,18 @@ def test_ii_warns_center():
         algo.fit(data)
 
 
+def test_ii_warns_center_with_no_use_ratings():
+    "Test that item-item warns if you configure to ignore ratings but center."
+    with pytest.warns(ConfigWarning):
+        knn.ItemItem(5, use_ratings=False, aggregate='sum')
+
+
+def test_ii_warns_wa_with_no_use_ratings():
+    "Test that item-item warns if you configure to ignore ratings but weighted=average."
+    with pytest.warns(ConfigWarning):
+        algo = knn.ItemItem(5, use_ratings=False, center=False)
+
+
 @lktu.wantjit
 @mark.skip("redundant with large_models")
 def test_ii_train_big():
@@ -456,7 +468,7 @@ def test_ii_implicit_save_load(tmp_path, ml_subset):
 
 @lktu.wantjit
 @mark.slow
-def test_ii_implicit():
+def test_ii_old_implicit():
     algo = knn.ItemItem(20, save_nbrs=100, center=False, aggregate='sum')
     data = ml_ratings.loc[:, ['user', 'item']]
 
@@ -469,6 +481,26 @@ def test_ii_implicit():
     assert all(preds[preds.notna()] > 0)
 
 
+@lktu.wantjit
+@mark.slow
+def test_ii_no_ratings():
+    a1 = knn.ItemItem(20, save_nbrs=100, center=False, aggregate='sum')
+    a1.fit(ml_ratings.loc[:, ['user', 'item']])
+
+    algo = knn.ItemItem(20, save_nbrs=100, center=False, aggregate='sum', use_ratings=False)
+
+    algo.fit(ml_ratings)
+    assert algo.item_counts_.sum() == algo.sim_matrix_.nnz
+    assert all(algo.sim_matrix_.values > 0)
+    assert all(algo.item_counts_ <= 100)
+
+    preds = algo.predict_for_user(50, [1, 2, 42])
+    assert all(preds[preds.notna()] > 0)
+    p2 = algo.predict_for_user(50, [1, 2, 42])
+    preds, p2 = preds.align(p2)
+    assert preds.values == approx(p2.values, nan_ok=True)
+
+
 @mark.slow
 def test_ii_implicit_fast_ident():
     algo = knn.ItemItem(20, save_nbrs=100, center=False, aggregate='sum')