Skip to content

Commit

Permalink
Merge pull request #296 from mdekstrand/feature/ii-ignore-ratings
Browse files Browse the repository at this point in the history
Add use_ratings switch to item-item k-NN
  • Loading branch information
mdekstrand committed Feb 2, 2022
2 parents 837f074 + dfa1b2d commit 3a8b62f
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 7 deletions.
7 changes: 7 additions & 0 deletions lenskit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,10 @@ class DataWarning(UserWarning):
Warning raised for detectable problems with input data.
"""
pass


class ConfigWarning(UserWarning):
"""
Warning raised for detectable problems with algorithm configurations.
"""
pass
27 changes: 22 additions & 5 deletions lenskit/algorithms/item_knn.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from numba import njit, prange
from numba.typed import List

from lenskit import util, DataWarning
from lenskit import util, DataWarning, ConfigWarning
from lenskit.data import sparse_ratings
from lenskit.sharing import in_share_context
from lenskit.util.parallel import is_mp_worker
Expand Down Expand Up @@ -228,10 +228,13 @@ class ItemItem(Predictor):
(``None`` for unlimited)
center(bool):
whether to normalize (mean-center) rating vectors prior to computing similarities
and aggregating user rating values. Turn this off when working with unary data
and other data types that don't respond well to centering.
and aggregating user rating values. Defaults to ``True``; turn this off when working
with unary data and other data types that don't respond well to centering.
aggregate:
the type of aggregation to do. Can be ``weighted-average`` or ``sum``.
the type of aggregation to do. Can be ``weighted-average`` (the default) or ``sum``.
use_ratings:
whether or not to use the rating values. If ``False``, it ignores rating values and
considers an implicit feedback signal of 1 for every (user,item) pair present.
Attributes:
item_index_(pandas.Index): the index of item IDs.
Expand All @@ -246,7 +249,7 @@ class ItemItem(Predictor):
RATING_AGGS = [AGG_WA] # the aggregates that use rating values

def __init__(self, nnbrs, min_nbrs=1, min_sim=1.0e-6, save_nbrs=None,
center=True, aggregate='weighted-average'):
center=True, aggregate='weighted-average', use_ratings=True):
self.nnbrs = nnbrs
if self.nnbrs is not None and self.nnbrs < 1:
self.nnbrs = -1
Expand All @@ -257,6 +260,20 @@ def __init__(self, nnbrs, min_nbrs=1, min_sim=1.0e-6, save_nbrs=None,
self.save_nbrs = save_nbrs
self.center = center
self.aggregate = aggregate
self.use_ratings = use_ratings
if not use_ratings:
if center:
_logger.warning('item-item configured to ignore ratings, but ``center=True`` - likely bug')
warnings.warn(util.clean_str('''
item-item configured to ignore ratings, but ``center=True``. This configuration
is unlikely to work well.
'''), ConfigWarning)
if aggregate == 'weighted-average':
_logger.warning('item-item configured to ignore ratings, but using weighted averages - likely bug')
warnings.warn(util.clean_str('''
item-item configured to ignore ratings, but use weighted averages. This configuration
is unlikely to work well.
'''), ConfigWarning)

def fit(self, ratings, **kwargs):
"""
Expand Down
36 changes: 34 additions & 2 deletions tests/test_knn_item_item.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from lenskit import DataWarning
from lenskit import ConfigWarning, DataWarning
from lenskit.algorithms import Recommender
from lenskit.algorithms.basic import Fallback
from lenskit.algorithms.bias import Bias
Expand Down Expand Up @@ -160,6 +160,18 @@ def test_ii_warns_center():
algo.fit(data)


def test_ii_warns_center_with_no_use_ratings():
"Test that item-item warns if you configure to ignore ratings but center."
with pytest.warns(ConfigWarning):
knn.ItemItem(5, use_ratings=False, aggregate='sum')


def test_ii_warns_wa_with_no_use_ratings():
"Test that item-item warns if you configure to ignore ratings but weighted=average."
with pytest.warns(ConfigWarning):
algo = knn.ItemItem(5, use_ratings=False, center=False)


@lktu.wantjit
@mark.skip("redundant with large_models")
def test_ii_train_big():
Expand Down Expand Up @@ -456,7 +468,7 @@ def test_ii_implicit_save_load(tmp_path, ml_subset):

@lktu.wantjit
@mark.slow
def test_ii_implicit():
def test_ii_old_implicit():
algo = knn.ItemItem(20, save_nbrs=100, center=False, aggregate='sum')
data = ml_ratings.loc[:, ['user', 'item']]

Expand All @@ -469,6 +481,26 @@ def test_ii_implicit():
assert all(preds[preds.notna()] > 0)


@lktu.wantjit
@mark.slow
def test_ii_no_ratings():
a1 = knn.ItemItem(20, save_nbrs=100, center=False, aggregate='sum')
a1.fit(ml_ratings.loc[:, ['user', 'item']])

algo = knn.ItemItem(20, save_nbrs=100, center=False, aggregate='sum', use_ratings=False)

algo.fit(ml_ratings)
assert algo.item_counts_.sum() == algo.sim_matrix_.nnz
assert all(algo.sim_matrix_.values > 0)
assert all(algo.item_counts_ <= 100)

preds = algo.predict_for_user(50, [1, 2, 42])
assert all(preds[preds.notna()] > 0)
p2 = algo.predict_for_user(50, [1, 2, 42])
preds, p2 = preds.align(p2)
assert preds.values == approx(p2.values, nan_ok=True)


@mark.slow
def test_ii_implicit_fast_ident():
algo = knn.ItemItem(20, save_nbrs=100, center=False, aggregate='sum')
Expand Down

0 comments on commit 3a8b62f

Please sign in to comment.