Simplify and improve tests.

This commit does the following: - simplify the code - improve the docs - some efficiency improvements in eliminating train interactions
maciejkula · Oct 13, 2017 · b14ddcd · b14ddcd
1 parent d022c05
commit b14ddcd
Show file tree

Hide file tree

Showing 2 changed files with 56 additions and 28 deletions.
diff --git a/spotlight/evaluation.py b/spotlight/evaluation.py
@@ -102,6 +102,14 @@ def sequence_mrr_score(model, test, exclude_preceding=False):
     return np.array(mrrs)
 
 
+def _get_precision_recall(predictions, targets, k):
+
+    predictions = predictions[:k]
+    num_hit = len(set(predictions).intersection(set(targets)))
+
+    return float(num_hit) / len(predictions), float(num_hit) / len(targets)
+
+
 def precision_recall_score(model, test, train=None, k=10):
     """
     Compute Precision@k and Recall@k scores. One score
@@ -118,61 +126,57 @@ def precision_recall_score(model, test, train=None, k=10):
         Test interactions.
     train: :class:`spotlight.interactions.Interactions`, optional
         Train interactions. If supplied, scores of known
-        interactions will be set to very low values and so not
-        affect the MRR.
+        interactions will not affect the computed metrics.
     k: int or array of int,
         The maximum number of predicted items
     Returns
     -------
 
-    (Precision@k, Recall@k): numpy array of shape (num_users,)
+    (Precision@k, Recall@k): numpy array of shape (num_users, len(k))
         A tuple of Precisions@k and Recalls@k for each user in test.
+        If k is a scalar, will return a tuple of vectors. If k is an
+        array, will return a tuple of arrays, where each row corresponds
+        to a user and each column corresponds to a value of k.
     """
 
     test = test.tocsr()
 
     if train is not None:
         train = train.tocsr()
 
-    if not isinstance(k, list):
-        ks = [k]
-    else:
-        ks = k
+    if np.isscalar(k):
+        k = np.array([k])
 
-    precisions = [list() for _ in range(len(ks))]
-    recalls = [list() for _ in range(len(ks))]
+    precision = []
+    recall = []
 
     for user_id, row in enumerate(test):
 
         if not len(row.indices):
             continue
 
         predictions = -model.predict(user_id)
-        predictions = predictions.argsort()
 
         if train is not None:
             rated = train[user_id].indices
-        else:
-            rated = []
+            predictions[rated] = FLOAT_MAX
 
-        predictions = [p for p in predictions if p not in rated]
+        predictions = predictions.argsort()
 
         targets = row.indices
 
-        for i, _k in enumerate(ks):
-            pred = predictions[:_k]
-            num_hit = len(set(pred).intersection(set(targets)))
-            precisions[i].append(float(num_hit) / len(pred))
-            recalls[i].append(float(num_hit) / len(targets))
+        user_precision, user_recall = zip(*[
+            _get_precision_recall(predictions, targets, x)
+            for x in k
+        ])
 
-    precisions = [np.array(i) for i in precisions]
-    recalls = [np.array(i) for i in recalls]
+        precision.append(user_precision)
+        recall.append(user_recall)
 
-    if not isinstance(k, list):
-        precisions = precisions[0]
-        recalls = recalls[0]
+    precision = np.array(precision).squeeze()
+    recall = np.array(recall).squeeze()
 
-    return precisions, recalls
+    return precision, recall
 
 
 def rmse_score(model, test):

diff --git a/tests/test_evaluation_metrics.py b/tests/test_evaluation_metrics.py
@@ -2,6 +2,8 @@
 
 import numpy as np
 
+import pytest
+
 from spotlight.evaluation import precision_recall_score
 from spotlight.cross_validation import random_train_test_split
 from spotlight.datasets import movielens
@@ -11,22 +13,44 @@
 CUDA = bool(os.environ.get('SPOTLIGHT_CUDA', False))
 
 
-def test_precision_recall():
+@pytest.fixture(scope='module')
+def data():
 
     interactions = movielens.get_movielens_dataset('100K')
 
     train, test = random_train_test_split(interactions,
                                           random_state=RANDOM_STATE)
 
     model = ImplicitFactorizationModel(loss='bpr',
-                                       n_iter=10,
+                                       n_iter=1,
                                        batch_size=1024,
                                        learning_rate=1e-2,
                                        l2=1e-6,
                                        random_state=RANDOM_STATE,
                                        use_cuda=CUDA)
     model.fit(train)
 
-    precision, recall = precision_recall_score(model, test, train)
+    return train, test, model
+
+
+@pytest.mark.parametrize('k', [
+    1,
+    [1, 1],
+    [1, 1, 1]
+])
+def test_precision_recall(data, k):
+
+    (train, test, model) = data
+
+    interactions = movielens.get_movielens_dataset('100K')
+    train, test = random_train_test_split(interactions,
+                                          random_state=RANDOM_STATE)
+
+    precision, recall = precision_recall_score(model, test, train, k=k)
+
+    assert precision.shape == recall.shape
 
-    print(np.mean(precision), np.mean(recall))
+    if not isinstance(k, list):
+        assert len(precision.shape) == 1
+    else:
+        assert precision.shape[1] == len(k)