Skip to content

Commit

Permalink
Merge pull request #33 from lenskit/feature/als
Browse files Browse the repository at this point in the history
improved tests and refactorings
  • Loading branch information
mdekstrand committed Oct 10, 2018
2 parents 5a2c5da + 5ede376 commit c9164dc
Show file tree
Hide file tree
Showing 8 changed files with 4,181 additions and 26 deletions.
14 changes: 14 additions & 0 deletions doc/knn.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,19 @@ implementations. These lightly-configurable implementations are intended
to capture the behavior of the Java-based LensKit implementations to provide
a good upgrade path and enable basic experiments out of the box.

.. contents:: :toc:

Item-based k-NN
---------------

.. module:: lenskit.algorithms.item_knn

.. autoclass:: ItemItem
:members:
:show-inheritance:

.. autoclass:: IIModel

User-based k-NN
---------------

Expand All @@ -16,3 +29,4 @@ User-based k-NN
:show-inheritance:

.. autoclass:: UUModel

14 changes: 14 additions & 0 deletions doc/mf.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,20 @@ Classic Matrix Factorization

LKPY provides classical matrix factorization implementations.

.. contents::
:local:

Common Support
--------------

.. module:: lenskit.algorithms.mf_common

The :py:mod:`mf_common` module contains common support code for matrix factorization
algorithms.

.. autoclass:: BiasMFModel
:members:

FunkSVD
-------

Expand Down
32 changes: 7 additions & 25 deletions lenskit/algorithms/funksvd.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
FunkSVD (biased MF).
"""

from collections import namedtuple
import logging

import pandas as pd
Expand All @@ -11,13 +10,10 @@

from . import Trainable, Predictor
from . import basic
from .mf_common import BiasMFModel

_logger = logging.getLogger(__package__)

BiasMFModel = namedtuple('BiasMFModel', ['user_index', 'item_index',
'global_bias', 'user_bias', 'item_bias',
'user_features', 'item_features'])


@n.jitclass([
('user_features', n.double[:, :]),
Expand Down Expand Up @@ -271,36 +267,22 @@ def train(self, ratings, bias=None):
model.user_features, model.item_features)

def predict(self, model, user, items, ratings=None):
if user not in model.user_index:
# look up user index
uidx = model.lookup_user(user)
if uidx < 0:
_logger.debug('user %s not in model', user)
return pd.Series(np.nan, index=items)

# get user index
uidx = model.user_index.get_loc(user)
assert uidx >= 0

# get item index & limit to valid ones
items = np.array(items)
iidx = model.item_index.get_indexer(items)
iidx = model.lookup_items(items)
good = iidx >= 0
good_items = items[good]
good_iidx = iidx[good]

# get user vector
uv = model.user_features[uidx, :]
# get item matrix
im = model.item_features[good_iidx, :]

# multiply
_logger.debug('scoring %d items for user %s', len(good_items), user)
rv = np.matmul(im, uv)
assert rv.shape[0] == len(good_items)
assert len(rv.shape) == 1
# add bias back in
rv = rv + model.global_bias
if model.user_bias is not None:
rv = rv + model.user_bias.iloc[uidx]
if model.item_bias is not None:
rv = rv + model.item_bias.iloc[good_iidx].values
rv = model.score(uidx, good_iidx)

# clamp if suitable
if self.range is not None:
Expand Down
94 changes: 94 additions & 0 deletions lenskit/algorithms/mf_common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""
Common utilities & implementations for matrix factorization.
"""

from collections import namedtuple
import logging

import pandas as pd
import numpy as np

_logger = logging.getLogger(__package__)


class BiasMFModel:
"""
Common model for biased matrix factorization.
Attributes:
user_index(pandas.Index): Users in the model (length=:math:`m`).
item_index(pandas.Index): Items in the model (length=:math:`n`).
global_bias(double): The global bias term.
user_bias(numpy.ndarray): The user bias terms.
item_bias(numpy.ndarray): The item bias terms.
user_features(numpy.ndarray): The :math:`m \\times k` user-feature matrix.
item_features(numpy.ndarray): The :math:`n \\times k` item-feature matrix.
"""

def __init__(self, users, items, gbias, ubias, ibias, umat, imat):
self.user_index = users
self.item_index = items
self.global_bias = gbias
self.user_bias = ubias
self.item_bias = ibias
self.user_features = umat
self.item_features = imat

def lookup_user(self, user):
"""
Look up the index for a user.
Args:
user: the user ID to look up
Returns:
int: the user index.
"""
try:
return self.user_index.get_loc(user)
except KeyError:
return -1

def lookup_items(self, items):
"""
Look up the indices for a set of items.
Args:
items(array-like): the item IDs to look up.
Returns:
numpy.ndarray: the item indices. Unknown items will have negative indices.
"""
return self.item_index.get_indexer(items)

def score(self, user, items, raw=False):
"""
Score a set of items for a user. User and item parameters must be indices
into the matrices.
Args:
user(int): the user index
items(array-like of int): the item indices
raw(bool): if ``True``, do return raw scores without biases added back.
Returns:
numpy.ndarray: the scores for the items.
"""

# get user vector
uv = self.user_features[user, :]
# get item matrix
im = self.item_features[items, :]
rv = np.matmul(im, uv)
assert rv.shape[0] == len(items)
assert len(rv.shape) == 1

if not raw:
# add bias back in
rv = rv + self.global_bias
if self.user_bias is not None:
rv = rv + self.user_bias.iloc[user]
if self.item_bias is not None:
rv = rv + self.item_bias.iloc[items].values

return rv

0 comments on commit c9164dc

Please sign in to comment.