Merge pull request #299 from mdekstrand/feature/doc-cleanups

Clean up missing documentation & bad links
lenskit · Feb 11, 2022 · ea900a1 · ea900a1
2 parents 6220d1a + b5b6cd8
commit ea900a1
Show file tree

Hide file tree

Showing 15 changed files with 105 additions and 59 deletions.
diff --git a/docs/batch.rst b/docs/batch.rst
@@ -47,9 +47,10 @@ to reduce the total memory consumption.
 
 Example usage::
 
+    from contextlib import closing
     algo = BiasedMF(50)
     algo = Recommender.adapt(algo)
-    algo = batch.train_isolated(algo, train_ratings)
-    preds = batch.predict(algo, test_ratings)
+    with closing(batch.train_isolated(algo, train_ratings)) as algo:
+        preds = batch.predict(algo, test_ratings)
 
 .. autofunction:: train_isolated
diff --git a/docs/conf.py b/docs/conf.py
@@ -206,12 +206,12 @@
 
 # -- Module Canonicalization ------------------------------------------------
 
-cleanups = {
-    'lenskit': ['Algorithm', 'Recommender', 'Predictor', 'CandidateSelector']
-}
-
-for module, objects in cleanups.items():
-    mod = import_module(module)
-    for name in objects:
-        obj = getattr(mod, name)
-        obj.__module__ = module
+# cleanups = {
+#     'lenskit': ['Algorithm', 'Recommender', 'Predictor', 'CandidateSelector']
+# }
+
+# for module, objects in cleanups.items():
+#     mod = import_module(module)
+#     for name in objects:
+#         obj = getattr(mod, name)
+#         obj.__module__ = module
diff --git a/docs/diagnostics.rst b/docs/diagnostics.rst
@@ -20,3 +20,5 @@ LensKit uses the following warning classes to report anomalous problems in
 use of LensKit.
 
 .. autoclass:: lenskit.DataWarning
+
+.. autoclass:: lenskit.ConfigWarning
diff --git a/docs/evaluation/topn-metrics.rst b/docs/evaluation/topn-metrics.rst
@@ -139,6 +139,6 @@ The :py:func:`bulk_impl` function registers a bulk implementation of a metric::
         # bulk metric implementation
 
 If a bulk implementation of a metric is available, and it is possible to use it, it will be used automatically
-when the corresponding metric is passed to :py:meth:`RecListAnalysis.add_metric`.
+when the corresponding metric is passed to :py:meth:`~lenskit.topn.RecListAnalysis.add_metric`.
 
-.. autofunction: bulk_impl
+.. autofunction:: bulk_impl
diff --git a/docs/lenskit.bib b/docs/lenskit.bib
@@ -132,7 +132,7 @@ @INPROCEEDINGS{Ekstrand2011-bp
   doi        = "10.1145/2043932.2043958"
 }
 
-@ARTICLE{Harper2015-cx,
+@ARTICLE{movielens,
   title    = "The {MovieLens} Datasets: History and Context",
   author   = "Harper, F Maxwell and Konstan, Joseph A",
   journal  = "ACM Transactions on Interactive Intelligent Systems",
@@ -214,22 +214,20 @@ @INPROCEEDINGS{Zhou2008-bj
   doi       = "10.1007/978-3-540-68880-8\_32"
 }
 
-@ARTICLE{Buitinck2013-ks,
-  title         = "{API} design for machine learning software: experiences from
-                   the scikit-learn project",
-  author        = "Buitinck, Lars and Louppe, Gilles and Blondel, Mathieu and
-                   Pedregosa, Fabian and Mueller, Andreas and Grisel, Olivier
-                   and Niculae, Vlad and Prettenhofer, Peter and Gramfort,
-                   Alexandre and Grobler, Jaques and Layton, Robert and
-                   Vanderplas, Jake and Joly, Arnaud and Holt, Brian and
-                   Varoquaux, Ga{\"e}l",
-  month         =  sep,
-  year          =  2013,
-  url           = "http://arxiv.org/abs/1309.0238",
-  archivePrefix = "arXiv",
-  eprint        = "1309.0238",
-  primaryClass  = "cs.LG",
-  arxivid       = "1309.0238"
+@INPROCEEDINGS{Buitinck2013-ks,
+  title      = "{API} design for machine learning software: experiences from
+                the scikit-learn project",
+  booktitle  = "Workshop on Languages for Data Mining and Machine Learning at
+                {ECMLPKDD} 2013",
+  author     = "Buitinck, Lars and Louppe, Gilles and Blondel, Mathieu and
+                Pedregosa, Fabian and Mueller, Andreas and Grisel, Olivier and
+                Niculae, Vlad and Prettenhofer, Peter and Gramfort, Alexandre
+                and Grobler, Jaques and Layton, Robert and Vanderplas, Jake and
+                Joly, Arnaud and Holt, Brian and Varoquaux, Ga{\"e}l",
+  month      =  sep,
+  year       =  2013,
+  url        = "http://arxiv.org/abs/1309.0238",
+  conference = "ECMLPKDD 2013"
 }
 
 @INPROCEEDINGS{Takacs2012-mr,
@@ -262,6 +260,23 @@ @INPROCEEDINGS{Takacs2011-ix
   doi             = "10.1145/2043932.2043987"
 }
 
+@INPROCEEDINGS{Tamm2021-hz,
+  title     = "Quality Metrics in Recommender Systems: Do We Calculate Metrics
+               Consistently?",
+  booktitle = "Fifteenth {ACM} Conference on Recommender Systems",
+  author    = "Tamm, Yan-Martin and Damdinov, Rinchin and Vasilev, Alexey",
+  publisher = "Association for Computing Machinery",
+  pages     = "708--713",
+  series    = "RecSys '21",
+  month     =  sep,
+  year      =  2021,
+  url       = "https://doi.org/10.1145/3460231.3478848",
+  address   = "New York, NY, USA",
+  location  = "Amsterdam, Netherlands",
+  isbn      = "9781450384582",
+  doi       = "10.1145/3460231.3478848"
+}
+
 @ARTICLE{scipy,
   title    = "{SciPy} 1.0: fundamental algorithms for scientific computing in
               Python",
@@ -356,7 +371,7 @@ @ARTICLE{Deshpande2004-ht
   doi       = "10.1145/963770.963776"
 }
 
-@INPROCEEDINGS{Ekstrand2020-if,
+@INPROCEEDINGS{lkpy,
   title           = "{LensKit} for {Python}: {Next-Generation} Software for
                      Recommender System Experiments",
   booktitle       = "Proceedings of the 29th {ACM} International Conference on
@@ -368,6 +383,24 @@ @INPROCEEDINGS{Ekstrand2020-if
   doi             = "10.1145/3340531.3412778"
 }
 
+@ARTICLE{Dacrema2021-hl,
+  title     = "A Troubling Analysis of Reproducibility and Progress in
+               Recommender Systems Research",
+  author    = "Dacrema, Maurizio Ferrari and Boglio, Simone and Cremonesi,
+               Paolo and Jannach, Dietmar",
+  journal   = "ACM Transactions on Information and System Security",
+  publisher = "Association for Computing Machinery",
+  volume    =  39,
+  number    =  2,
+  pages     = "1--49",
+  month     =  jan,
+  year      =  2021,
+  url       = "https://doi.org/10.1145/3434185",
+  address   = "New York, NY, USA",
+  issn      = "1094-9224, 1046-8188",
+  doi       = "10.1145/3434185"
+}
+
 @INPROCEEDINGS{Grover2019-nc,
   title      = "Stochastic Optimization of Sorting Networks via Continuous
                 Relaxations",

diff --git a/docs/ranking.rst b/docs/ranking.rst
@@ -1,7 +1,7 @@
 Ranking Methods
 ===============
 
-.. :py:module:: lenskit.algorithms.ranking
+.. module:: lenskit.algorithms.ranking
 
 The :py:mod:`lenskit.algorithms.ranking` module contains various *ranking methods*:
 algorithms that can use scores to produce ranks.  This includes primary rankers, like

diff --git a/lenskit/algorithms/__init__.py b/lenskit/algorithms/__init__.py
@@ -21,15 +21,16 @@ class Algorithm(metaclass=ABCMeta):
     :canonical: lenskit.Algorithm
     """
 
+    IGNORED_PARAMS = []
     """
     Names of parameters to ignore in :meth:`get_params`.
     """
-    IGNORED_PARAMS = []
+
+    EXTRA_PARAMS = []
     """
     Names of extra parameters to include in :meth:`get_params`.  Useful when the
     constructor takes ``**kwargs``.
     """
-    EXTRA_PARAMS = []
 
     @abstractmethod
     def fit(self, ratings, **kwargs):
@@ -53,8 +54,8 @@ def get_params(self, deep=True):
         should match constructor argument names.
 
         The default implementation returns all attributes that match a constructor parameter
-        name.  It should be compatible with :py:meth:`scikit.base.BaseEstimator.get_params`
-        method so that LensKit alogrithms can be cloned with :py:func:`scikit.base.clone`
+        name.  It should be compatible with :py:meth:`sklearn.base.BaseEstimator.get_params`
+        method so that LensKit alogrithms can be cloned with :py:func:`sklearn.base.clone`
         as well as :py:func:`lenskit.util.clone`.
 
         Returns:
@@ -80,6 +81,8 @@ class Predictor(Algorithm, metaclass=ABCMeta):
     Predicts user ratings of items.  Predictions are really estimates of the user's like or
     dislike, and the ``Predictor`` interface makes no guarantees about their scale or
     granularity.
+
+    :canonical: lenskit.Predictor
     """
 
     def predict(self, pairs, ratings=None):

diff --git a/lenskit/algorithms/als.py b/lenskit/algorithms/als.py
@@ -313,16 +313,18 @@ def _train_implicit_row_lu(items, ratings, other, otOr):
 
 class BiasedMF(MFPredictor):
     """
-    Biased matrix factorization trained with alternating least squares :cite:p:`Zhou2008-bj`.  This is a
-    prediction-oriented algorithm suitable for explicit feedback data.
+    Biased matrix factorization trained with alternating least squares :cite:p:`Zhou2008-bj`.  This
+    is a prediction-oriented algorithm suitable for explicit feedback data, using the alternating
+    least squares approach to compute :math:`P` and :math:`Q` to minimize the regularized squared
+    reconstruction error of the ratings matrix.
 
     It provides two solvers for the optimization step (the `method` parameter):
 
     ``'cd'`` (the default)
-        Coordinate descent :cite:p:`Takacs2011-ix`, adapted for a separately-trained bias model and to use
-        weighted regularization as in the original ALS paper :cite:p:`Zhou2008-bj`.
+        Coordinate descent :cite:p:`Takacs2011-ix`, adapted for a separately-trained bias model and
+        to use weighted regularization as in the original ALS paper :cite:p:`Zhou2008-bj`.
     ``'lu'``
-        A direct implementation of the original ALS concept :cite:p:`Zhou2008-bj` using LU-decomposition
+        A direct implementation of the original ALS :cite:p:`Zhou2008-bj` using LU-decomposition
         to solve for the optimized matrices.
 
     See the base class :class:`.MFPredictor` for documentation on
@@ -525,8 +527,8 @@ class ImplicitMF(MFPredictor):
     See the base class :class:`.MFPredictor` for documentation on the estimated parameters
     you can extract from a trained model.
 
-    With weight :math:`w`, this function decomposes the matrix :matrix:`\\mathbb{1}^* + Rw`, where
-    $\\mathbb{1}^*$ is an $m \\times n$ matrix of all 1s.
+    With weight :math:`w`, this function decomposes the matrix :math:`\\mathbb{1}^* + Rw`, where
+    :math:`\\mathbb{1}^*` is an :math:`m \\times n` matrix of all 1s.
 
     .. versionchanged:: 0.14
         By default, ``ImplicitMF`` ignores a ``rating`` column if one is present in the training
@@ -546,7 +548,7 @@ class ImplicitMF(MFPredictor):
             Whether to use the `rating` column, if present.  Defaults to ``False``; when ``True``,
             the values from the ``rating`` column are used, and multipled by ``weight``; if ``False``,
             ImplicitMF treats every rated user-item pair as having a rating of 1.
-        method(string):
+        method(str):
             the training method.
 
             ``'cg'`` (the default)

diff --git a/lenskit/algorithms/ranking.py b/lenskit/algorithms/ranking.py
@@ -18,14 +18,24 @@ class TopN(Recommender, Predictor):
 
     .. note::
         This class does not do anything of its own in :meth:`fit`.  If its
-        predictor and candidate selector are both fit, the top-N recommender
-        does not need to be fit.
+        predictor and candidate selector are both fit separately, the top-N recommender
+        does not need to be fit.  This can be useful when reusing a predictor in other
+        contexts::
+
+            pred = item_knn.ItemItem(20, feedback='implicit')
+            select = UnratedItemCandidateSelector()
+            topn = TopN(pred, select)
+
+            pred.fit(ratings)
+            select.fit(ratings)
+            # topn.fit is unnecessary now
 
     Args:
         predictor(Predictor):
             The underlying predictor.
         selector(CandidateSelector):
-            The candidate selector.  If ``None``, uses :class:`UnratedItemCandidateSelector`.
+            The candidate selector.  If ``None``, uses
+            :class:`UnratedItemCandidateSelector`.
     """
 
     def __init__(self, predictor, selector=None):

diff --git a/lenskit/algorithms/user_knn.py b/lenskit/algorithms/user_knn.py
@@ -168,9 +168,6 @@ def fit(self, ratings, **kwargs):
 
         Args:
             ratings(pandas.DataFrame): (user, item, rating) data for collaborative filtering.
-
-        Returns:
-            UUModel: a memorized model for efficient user-based CF computation.
         """
         util.check_env()
         uir, users, items = sparse_ratings(ratings)

diff --git a/lenskit/metrics/topn.py b/lenskit/metrics/topn.py
@@ -10,6 +10,9 @@
 
 
 def bulk_impl(metric):
+    """
+    Decorator to register a bulk implementation for a metric.
+    """
     def wrap(impl):
         metric.bulk_score = impl
         return impl

diff --git a/lenskit/sharing/__init__.py b/lenskit/sharing/__init__.py
@@ -90,8 +90,8 @@ def persist(model, *, method=None):
     """
     Persist a model for cross-process sharing.
 
-    This will return a persiste dmodel that can be used to reconstruct the model
-    in a worker process (using :func:`reconstruct`).
+    This will return a persisted model that can be used to reconstruct the model
+    in a worker process (using :meth:`PersistedModel.get`).
 
     If no method is provided, this function automatically selects a model persistence
     strategy from the the following, in order:

diff --git a/lenskit/topn.py b/lenskit/topn.py
@@ -37,11 +37,6 @@ class RecListAnalysis:
     contains a ``rating`` column, that is used as the users' rating for
     metrics that require it; otherwise, a rating value of 1 is assumed.
 
-    .. warning::
-       Currently, RecListAnalysis will silently drop users who received
-       no recommendations.  We are working on an ergonomic API for fixing
-       this problem.
-
     Args:
         group_cols(list):
             The columns to group by, or ``None`` to use the default.

diff --git a/lenskit/util/__init__.py b/lenskit/util/__init__.py
@@ -32,7 +32,7 @@
 def clone(algo):
     """
     Clone an algorithm, but not its fitted data.  This is like
-    :py:func:`scikit.base.clone`, but may not work on arbitrary SciKit estimators.
+    :func:`sklearn.base.clone`, but may not work on arbitrary SciKit estimators.
     LensKit algorithms are compatible with SciKit clone, however, so feel free
     to use that if you need more general capabilities.
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -59,7 +59,7 @@ dev = [
     "sphinx-autobuild >= 2021",
 ]
 doc = [
-    "sphinx >= 3",
+    "sphinx >= 4.2",
     "sphinxcontrib-bibtex >= 2.0",
     "sphinx_rtd_theme >= 0.5",
     "nbsphinx >= 0.8",