Skip to content

Commit

Permalink
Merge branch 'feature/implicit'
Browse files Browse the repository at this point in the history
  • Loading branch information
mdekstrand committed Nov 28, 2018
2 parents 75bc90b + 59f6c2a commit 16d72f8
Show file tree
Hide file tree
Showing 10 changed files with 203 additions and 19 deletions.
20 changes: 12 additions & 8 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
# Python package
# Create and test a Python package on multiple Python versions.
# Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more:
# https://docs.microsoft.com/vsts/pipelines/languages/python
variables:
conda.deps: >
python=$(python.version)
pandas scipy pytables fastparquet python-snappy numba cffi
invoke coverage pytest pytest-cov
invoke coverage pytest pytest-cov cython
pip.deps: >
invoke pytest coverage pytest-cov
pandas scipy pyarrow
numba
numba cython
pip.extras: >
hpfrec==0.2.2.5 implicit
jobs:

Expand All @@ -31,7 +29,10 @@ jobs:
createCustomEnvironment: true
environmentName: lkpy
packageSpecs: $(conda.deps)
updateConda: false

- script: |
sudo /usr/envs/lkpy/bin/pip install $(pip.extras)
displayName: 'Extra PyPI deps'
- script: |
if [ ! -r ~/ml-100k/u.data ]; then
Expand Down Expand Up @@ -98,7 +99,7 @@ jobs:
python -m pip install --upgrade pip
pip install $(pip.deps)
displayName: 'Install dependencies'
- script: |
if [ ! -r ~/ml-100k/u.data ]; then
wget --no-verbose -O ml-100k.zip http://files.grouplens.org/datasets/movielens/ml-100k.zip
Expand Down Expand Up @@ -247,6 +248,9 @@ jobs:
packageSpecs: $(conda.deps)
updateConda: false

- script: sudo conda install -y llvm-openmp
displayName: 'Install OpenMP'

- script: |
if [ ! -r ~/ml-100k/u.data ]; then
wget --no-verbose -O ml-100k.zip http://files.grouplens.org/datasets/movielens/ml-100k.zip
Expand Down
1 change: 1 addition & 0 deletions doc/algorithms.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ algorithms.
knn
mf
hpf
implicit
3 changes: 2 additions & 1 deletion doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,5 +180,6 @@
'pandas': ('http://pandas.pydata.org/pandas-docs/stable/', None),
'numpy': ('https://docs.scipy.org/doc/numpy/', None),
'scipy': ('https://docs.scipy.org/doc/scipy/reference/', None),
'hpfrec': ('https://hpfrec.readthedocs.io/en/latest/', None)
'hpfrec': ('https://hpfrec.readthedocs.io/en/latest/', None),
'implicit': ('https://implicit.readthedocs.io/en/latest/', None)
}
15 changes: 15 additions & 0 deletions doc/implicit.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
Implicit
========

.. module:: lenskit.algorithms.implicit

This module provides a LensKit bridge to the implicit_ library implementing
several implicit-feedback recommenders.

.. _implicit: https://implicit.readthedocs.io/en/latest/

.. autoclass:: ALS
:members:

.. autoclass:: BPR
:members:
84 changes: 84 additions & 0 deletions lenskit/algorithms/implicit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
from collections import namedtuple
import pandas as pd
import numpy as np

from implicit.als import AlternatingLeastSquares
from implicit.bpr import BayesianPersonalizedRanking

from ..matrix import sparse_ratings
from . import Trainable, Recommender

ImplicitModel = namedtuple('ImplicitModel', [
'algo', 'matrix', 'users', 'items'
])
ImplicitModel.__doc__ = '''
Model for *implicit*-backed recommenders.
Attributes:
algo(implicit.RecommenderBase): the underlying algorithm.
matrix(scipy.sparse.csr_matrix): the user-item matrix.
users(pandas.Index): the user ID to user position index.
items(pandas.Index): the item ID to item position index.
'''


class BaseRec(Trainable, Recommender):
"""
Base class for Implicit-backed recommenders.
"""
def __init__(self, algo, *args, **kwargs):
self.algo_class = algo
self.algo_args = args
self.algo_kwargs = kwargs

def train(self, ratings):
matrix, users, items = sparse_ratings(ratings, scipy=True)
iur = matrix.T.tocsr()

algo = self.algo_class(*self.algo_args, **self.algo_kwargs)
algo.fit(iur)

return ImplicitModel(algo, matrix, users, items)

def recommend(self, model: ImplicitModel, user, n=None, candidates=None, ratings=None):
try:
uid = model.users.get_loc(user)
except KeyError:
return pd.DataFrame({'item': []})

if candidates is None:
recs = model.algo.recommend(uid, model.matrix, N=n)
else:
cands = model.items.get_indexer(candidates)
cands = cands[cands >= 0]
recs = model.algo.rank_items(uid, model.matrix, cands)

if n is not None:
recs = recs[:n]
rec_df = pd.DataFrame.from_records(recs, columns=['item_pos', 'score'])
rec_df['item'] = model.items[rec_df.item_pos]
return rec_df.loc[:, ['item', 'score']]


class ALS(BaseRec):
"""
LensKit interface to :py:mod:`implicit.als`.
"""
def __init__(self, *args, **kwargs):
"""
Construct an ALS recommender. The arguments are passed as-is to
:py:class:`implicit.als.AlternatingLeastSquares`.
"""
super().__init__(AlternatingLeastSquares, *args, **kwargs)


class BPR(BaseRec):
"""
LensKit interface to :py:mod:`implicit.bpr`.
"""
def __init__(self, *args, **kwargs):
"""
Construct an ALS recommender. The arguments are passed as-is to
:py:class:`implicit.als.BayesianPersonalizedRanking`.
"""
super().__init__(BayesianPersonalizedRanking, *args, **kwargs)
4 changes: 2 additions & 2 deletions lenskit/algorithms/mf_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,12 @@ def n_features(self):
@property
def n_users(self):
"The number of users."
return len(self.users)
return len(self.user_index)

@property
def n_items(self):
"The number of items."
return len(self.items)
return len(self.item_index)

def lookup_user(self, user):
"""
Expand Down
6 changes: 6 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@
'sphinx >= 1.8',
'sphinx_rtd_theme',
'nbsphinx'
],
'hpf': [
'hpfrec'
],
'implicit': [
'implicit'
]
},
packages=find_packages()
Expand Down
10 changes: 2 additions & 8 deletions tests/test_batch_recommend.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,7 @@ def test_recommend_single(mlb):
assert len(res) == 1
assert all(res['user'] == 1)
assert all(res['rank'] == 1)
if sys.version_info >= (3, 6):
assert list(res.columns) == ['user', 'rank', 'item', 'score']
else:
warnings.warn('Python 3.5 loses column order')
assert set(res.columns) == set(['user', 'rank', 'item', 'score'])

expected = mlb.model.mean + mlb.model.items.loc[31] + mlb.model.users.loc[1]
assert res.score.iloc[0] == pytest.approx(expected)
Expand All @@ -54,10 +51,7 @@ def candidates(user):
res = lkb.recommend(mlb.algo, mlb.model, [5], 10, candidates)

assert len(res) == 10
if sys.version_info >= (3, 6):
assert list(res.columns) == ['user', 'rank', 'item', 'score']
else:
warnings.warn('Python 3.5 loses column order')
assert set(res.columns) == set(['user', 'rank', 'item', 'score'])
assert all(res['user'] == uid)
assert all(res['rank'] == np.arange(10) + 1)
# they should be in decreasing order
Expand Down
1 change: 1 addition & 0 deletions tests/test_hpf.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
def test_hpf_train_large():
algo = hpf.HPF(20)
ratings = lktu.ml_pandas.renamed.ratings
ratings = ratings.assign(rating=ratings.rating + 0.5)
model = algo.train(ratings)

assert model is not None
Expand Down
78 changes: 78 additions & 0 deletions tests/test_implicit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import logging

import pandas as pd
import numpy as np

from pytest import mark

import lk_test_utils as lktu

try:
from lenskit.algorithms import implicit
have_implicit = True
except ImportError:
have_implicit = False

_log = logging.getLogger(__name__)

simple_df = pd.DataFrame({'item': [1, 1, 2, 3],
'user': [10, 12, 10, 13],
'rating': [4.0, 3.0, 5.0, 2.0]})


@mark.slow
@mark.skipif(not have_implicit, reason='implicit not installed')
def test_implicit_als_train_rec():
algo = implicit.ALS(25)
ratings = lktu.ml_pandas.renamed.ratings

model = algo.train(ratings)
assert model is not None

recs = algo.recommend(model, 100, n=20)
assert len(recs) == 20


@mark.slow
@mark.eval
@mark.skipif(not have_implicit, reason='implicit not installed')
@mark.skipif(not lktu.ml100k.available, reason='ML100K data not present')
def test_implicit_als_batch_accuracy():
import lenskit.crossfold as xf
from lenskit import batch, topn
import lenskit.metrics.topn as lm

ratings = lktu.ml100k.load_ratings()

algo = implicit.ALS(25)

def eval(train, test):
_log.info('running training')
train['rating'] = train.rating.astype(np.float_)
model = algo.train(train)
users = test.user.unique()
_log.info('testing %d users', len(users))
candidates = topn.UnratedCandidates(train)
recs = batch.recommend(algo, model, users, 100, candidates, test)
return recs

folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
recs = pd.concat(eval(train, test) for (train, test) in folds)

_log.info('analyzing recommendations')
ndcg = recs.groupby('user').rating.apply(lm.ndcg)
_log.info('ndcg for users is %.4f', ndcg.mean())
assert ndcg.mean() > 0


@mark.slow
@mark.skipif(not have_implicit, reason='implicit not installed')
def test_implicit_bpr_train_rec():
algo = implicit.BPR(25)
ratings = lktu.ml_pandas.renamed.ratings

model = algo.train(ratings)
assert model is not None

recs = algo.recommend(model, 100, n=20)
assert len(recs) == 20

0 comments on commit 16d72f8

Please sign in to comment.