Merge pull request #303 from mdekstrand/feature/246-hit

Add a hit metric to compute hit rates
lenskit · Feb 12, 2022 · fdb5d0d · fdb5d0d
2 parents d88b41b + ef4a930
commit fdb5d0d
Show file tree

Hide file tree

Showing 3 changed files with 195 additions and 0 deletions.
diff --git a/docs/evaluation/topn-metrics.rst b/docs/evaluation/topn-metrics.rst
@@ -88,6 +88,7 @@ These metrics treat the recommendation list as a classification of relevant item
 
 .. autofunction:: precision
 .. autofunction:: recall
+.. autofunction:: hit
 
 Ranked List Metrics
 -------------------

diff --git a/lenskit/metrics/topn.py b/lenskit/metrics/topn.py
@@ -103,6 +103,60 @@ def _bulk_recall(recs, truth, k=None):
     return scores['ngood'] / scores['nrel']
 
 
+def hit(recs, truth, k=None):
+    """
+    Compute whether or not a list is a hit; any list with at least one relevant item in the
+    first :math:`k` positions (:math:`L_{\\le k} \\cap I_u^{\\mathrm{test}} \\ne \\emptyset`)
+    is scored as 1, and lists with no relevant items as 0.  When averaged over the recommendation
+    lists, this computes the *hit rate* :cite:p:`Deshpande2004-ht`.
+
+    .. math::
+        \\frac{|L \\cap I_u^{\\mathrm{test}}|}{\\operatorname{max}\\{|I_u^{\\mathrm{test}}|, k\\}}
+
+    This metric has a bulk implementation.
+    """
+    nrel = len(truth)
+    if nrel == 0:
+        return None
+
+    if k is not None:
+        nrel = min(nrel, k)
+        recs = recs.iloc[:k]
+
+    good = recs['item'].isin(truth.index)
+    if np.any(good):
+        return 1
+    else:
+        return 0
+
+
+@bulk_impl(hit)
+def _bulk_hit(recs, truth, k=None):
+    tcounts = truth.reset_index().groupby('LKTruthID')['item'].count()
+
+    if k is not None:
+        _log.debug('truncating to k for recall')
+        tcounts = np.minimum(tcounts, k)
+        recs = recs[recs['rank'] <= k]
+
+    good = recs.join(truth, on=['LKTruthID', 'item'], how='inner')
+    gcounts = good.groupby('LKRecID')['item'].count()
+
+    # we need all lists, because some might have no truth (oops), some no recs (also oops)
+    lists = recs[['LKRecID', 'LKTruthID']].drop_duplicates()
+
+    scores = lists.join(gcounts.to_frame('ngood'), on='LKRecID', how='left')
+    scores['ngood'].fillna(0, inplace=True)
+
+    scores = scores.join(tcounts.to_frame('nrel'), on='LKTruthID', how='left')
+    scores = scores.set_index('LKRecID')
+
+    good = scores['ngood'] > 0
+    good = good.astype('f4')
+    good[scores['nrel'] == 0] = np.nan
+    return good
+
+
 def recip_rank(recs, truth, k=None):
     """
     Compute the reciprocal rank :cite:p:`Kantor1997-lm` of the first relevant

diff --git a/tests/test_topn_hit.py b/tests/test_topn_hit.py
@@ -0,0 +1,140 @@
+import logging
+import numpy as np
+import pandas as pd
+
+from pytest import approx
+
+from lenskit.metrics.topn import hit
+from lenskit.util.test import demo_recs
+from lenskit import topn
+
+_log = logging.getLogger(__name__)
+
+
+def _test_hit(items, rel, **kwargs):
+    recs = pd.DataFrame({'item': items})
+    truth = pd.DataFrame({'item': rel}).set_index('item')
+    return hit(recs, truth, **kwargs)
+
+
+def test_hit_empty_zero():
+    hr = _test_hit([], [1, 3])
+    assert hr == 0
+
+
+def test_hit_norel_na():
+    hr = _test_hit([1, 3], [])
+    assert hr is None
+
+
+def test_hit_simple_cases():
+    hr = _test_hit([1, 3], [1, 3])
+    assert hr == 1
+
+    hr = _test_hit([1], [1, 3])
+    assert hr == 1
+
+    hr = _test_hit([1, 2, 3, 4], [1, 3])
+    assert hr == 1
+
+    hr = _test_hit([1, 2, 3, 4], range(5, 10))
+    assert hr == 0
+
+    hr = _test_hit([1, 2, 3, 4], range(4, 9))
+    assert hr == 1
+
+
+def test_hit_series():
+    hr = _test_hit(pd.Series([1, 3]), pd.Series([1, 3]))
+    assert hr == 1
+
+    hr = _test_hit(pd.Series([1, 2, 3]), pd.Series([1, 3, 5, 7]))
+    assert hr == 1
+
+    hr = _test_hit(pd.Series([1, 2, 3]), pd.Series([5, 7]))
+    assert hr == 0
+
+
+def test_hit_series_set():
+    hr = _test_hit(pd.Series([1, 2, 3, 4]), [1, 3, 5, 7])
+    assert hr == 1
+
+    hr = _test_hit(pd.Series([1, 2, 3]), range(4, 9))
+    assert hr == 0
+
+
+def test_hit_series_index():
+    hr = _test_hit(pd.Series([1, 3]), pd.Index([1, 3]))
+    assert hr == 1
+
+    hr = _test_hit(pd.Series([1, 2, 3, 4]), pd.Index([1, 3, 5, 7]))
+    assert hr == 1
+
+    hr = _test_hit(pd.Series([1, 2, 3]), pd.Index(range(4, 9)))
+    assert hr == 0
+
+
+def test_hit_series_array():
+    hr = _test_hit(pd.Series([1, 3]), np.array([1, 3]))
+    assert hr == 1
+
+    hr = _test_hit(pd.Series([1, 2, 3, 4]), np.array([1, 3, 5, 7]))
+    assert hr == 1
+
+    hr = _test_hit(pd.Series([1, 2, 3]), np.arange(4, 9, 1, 'u4'))
+    assert hr == 0
+
+
+def test_hit_array():
+    hr = _test_hit(np.array([1, 3]), np.array([1, 3]))
+    assert hr == 1
+
+    hr = _test_hit(np.array([1, 2, 3, 4]), np.array([1, 3, 5, 7]))
+    assert hr == 1
+
+    hr = _test_hit(np.array([1, 2, 3]), np.arange(4, 9, 1, 'u4'))
+    assert hr == 0
+
+
+def test_hit_long_items():
+    rel = np.arange(100)
+    items = [1, 0, 150, 3, 10, 30, 120, 4, 17]
+    items = np.array(items)
+
+    r = _test_hit(items, rel, k=5)
+    assert r == 1
+
+    items += 200
+    items[5] = 5
+
+    r = _test_hit(np.array(items) + 200, rel, k=5)
+    assert r == 0
+
+
+def test_hit_partial_rel():
+    rel = np.arange(100)
+    items = [1, 0, 150, 3, 10]
+
+    r = _test_hit(items, rel, k=10)
+    assert r == 1
+
+
+def test_hit_bulk_k(demo_recs):
+    "bulk and normal match"
+    train, test, recs = demo_recs
+    assert test['user'].value_counts().max() > 5
+
+    rla = topn.RecListAnalysis()
+    rla.add_metric(hit, name='hk', k=5)
+    rla.add_metric(hit)
+    # metric without the bulk capabilities
+    rla.add_metric(lambda *a, **k: hit(*a, **k), name='ind_hk', k=5)
+    rla.add_metric(lambda *a: hit(*a), name='ind_h')
+    res = rla.compute(recs, test)
+
+    print(res)
+    _log.info('recall mismatches:\n%s',
+              res[res.hit != res.ind_h])
+
+    assert res.hit.values == approx(res.ind_h.values)
+    assert res.hk.values == approx(res.ind_hk.values)