Skip to content

Commit

Permalink
Merge pull request #270 from mdekstrand/feature/split-hpf
Browse files Browse the repository at this point in the history
Split HPF into a separate package
  • Loading branch information
mdekstrand committed Oct 22, 2021
2 parents 338b7fc + 4b235a5 commit 1ccdc3e
Show file tree
Hide file tree
Showing 8 changed files with 56 additions and 159 deletions.
10 changes: 10 additions & 0 deletions doc/addons.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Add-On Algorithms
=================

Several add-on packages to LensKit provide additional collections of algorithms or bridges
to implementations in other packages.

.. toctree::
:caption: External Algorithms

Poisson factorization <https://lkpy.lenskit.org/projects/lenskit-hpf/>
10 changes: 6 additions & 4 deletions doc/algorithms.rst
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,13 @@ TensorFlow
tf.IntegratedBiasMF
tf.BPR

External Library Wrappers
~~~~~~~~~~~~~~~~~~~~~~~~~
Add-On Packages
~~~~~~~~~~~~~~~

See `add-on algorithms <addons.rst>`_ for additional algorithm families and bridges to other
packages.

.. autosummary::

implicit.BPR
implicit.ALS
hpf.HPF
12 changes: 0 additions & 12 deletions doc/hpf.rst

This file was deleted.

4 changes: 2 additions & 2 deletions doc/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ Resources
.. toctree::
:maxdepth: 2
:caption: Running Experiments

datasets
crossfold
batch
Expand All @@ -52,8 +52,8 @@ Resources
bias
knn
mf
addons
tf
hpf
implicit

.. toctree::
Expand Down
57 changes: 1 addition & 56 deletions lenskit/algorithms/hpf.py
Original file line number Diff line number Diff line change
@@ -1,56 +1 @@
import logging

import pandas as pd

from .mf_common import MFPredictor

_logger = logging.getLogger(__name__)


class HPF(MFPredictor):
"""
Hierarchical Poisson factorization, provided by
`hpfrec <https://hpfrec.readthedocs.io/en/latest/>`_.
Args:
features(int): the number of features
**kwargs: arguments passed to :py:class:`hpfrec.HPF`.
"""

def __init__(self, features, **kwargs):
self.features = features
self._kwargs = kwargs

def fit(self, ratings, **kwargs):
import hpfrec

users = pd.Index(ratings.user.unique())
items = pd.Index(ratings.item.unique())

if 'rating' in ratings.columns:
count = ratings.rating.values.copy()
else:
_logger.info('no ratings found, assuming 1.0')
count = 1.0

hpfdf = pd.DataFrame({
'UserId': users.get_indexer(ratings.user),
'ItemId': items.get_indexer(ratings.item),
'Count': count
})

hpf = hpfrec.HPF(self.features, reindex=False, **self._kwargs)

_logger.info('fitting HPF model with %d features', self.features)
hpf.fit(hpfdf)

self.user_index_ = users
self.item_index_ = items
self.user_features_ = hpf.Theta
self.item_features_ = hpf.Beta

return self

def predict_for_user(self, user, items, ratings=None):
# look up user index
return self.score_by_ids(user, items)
from lenskit_hpf import HPF # noqa: F401
35 changes: 35 additions & 0 deletions lkbuild/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
"""

import sys
from pathlib import Path
from invoke import task
from . import env
import yaml

__ALL__ = [
'dev_lock',
Expand Down Expand Up @@ -39,6 +41,39 @@ def dev_lock(c, platform=None, extras=None, version=None, blas=None, env_file=Fa
c.run(cmd)


@task(iterable=['extras'])
def env_file(c, platform=None, extras=None, version=None, blas=None, dev_deps=True,
output=None, name=None):
"Create an unresolved environment file"
from conda_lock.conda_lock import parse_source_files, aggregate_lock_specs

if not platform:
platform = env.conda_platform()

files = [Path('pyproject.toml')]
if version:
files.append(Path(f'lkbuild/python-{version}-spec.yml'))
if blas:
files.append(Path(f'lkbuild/{blas}-spec.yml'))

lock = parse_source_files(files, platform, dev_deps, extras)
lock = aggregate_lock_specs(lock)
env_spec = {
'channels': lock.channels,
'dependencies': lock.specs,
}
if name:
env_spec['name'] = name

if output:
print('writing environment to', output, file=sys.stderr)
out = Path(output)
with out.open('w') as f:
yaml.dump(env_spec, f)
else:
yaml.dump(env_spec, sys.stdout)


@task
def conda_platform(c, gh_output=False):
plat = env.conda_platform()
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ classifiers = [
]
requires-python = ">= 3.7"
description-file = "README.md"
license = "MIT"
requires = [
"pandas >=1.0, ==1.*",
"numpy >= 1.17",
Expand Down Expand Up @@ -65,7 +66,7 @@ demo = [
"nbval >= 0.9",
"matplotlib ~= 3.4",
]
hpf = ["hpfrec"]
hpf = ["lenskit-hpf"]
implicit = ["implicit"]
sklearn = ["scikit-learn >= 0.22"]
tf = ["tensorflow >=2.1,<2.6"]
Expand Down
84 changes: 0 additions & 84 deletions tests/test_hpf.py

This file was deleted.

0 comments on commit 1ccdc3e

Please sign in to comment.