From 435e7fb1e6abf8afbcfc9f7e8943bdd89845ca4a Mon Sep 17 00:00:00 2001
From: Rolf Jagerman <rjagerman@gmail.com>
Date: Wed, 28 Jun 2017 15:04:46 +0200
Subject: [PATCH 1/5] Update ListPL to be more efficient on GPU with new cupy
 functions

---
 shoelace/loss/listwise.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/shoelace/loss/listwise.py b/shoelace/loss/listwise.py
index d6ef7c1..fdd36aa 100644
--- a/shoelace/loss/listwise.py
+++ b/shoelace/loss/listwise.py
@@ -91,12 +91,10 @@ def pl_sample(self, t):
                  parameterized by the target labels
         """
         xp = cuda.get_array_module(t)
-        if not hasattr(xp, 'asnumpy'):
-            xp.asnumpy = lambda x: x
         t = t[:, 0]
 
         probs = xp.exp(t * self.α)
         probs /= xp.sum(probs)
-        return np.random.choice(probs.shape[0], probs.shape[0], replace=False,
-                                p=xp.asnumpy(probs))
+        return xp.random.choice(probs.shape[0], probs.shape[0], replace=False,
+                                p=probs)
 

From 524ade8d43a5e6d81247183f958ab1d467b8950b Mon Sep 17 00:00:00 2001
From: Rolf Jagerman <rjagerman@gmail.com>
Date: Wed, 28 Jun 2017 18:28:55 +0200
Subject: [PATCH 2/5] Simplify loss function code and make it more modular

---
 shoelace/loss/listwise.py            | 120 +++++++++++----------------
 test/examples/test_linear_network.py |  16 +++-
 test/functions/test_logcumsumexp.py  |   2 +-
 test/loss/test_listwise.py           |  42 ++--------
 4 files changed, 71 insertions(+), 109 deletions(-)

diff --git a/shoelace/loss/listwise.py b/shoelace/loss/listwise.py
index fdd36aa..a615026 100644
--- a/shoelace/loss/listwise.py
+++ b/shoelace/loss/listwise.py
@@ -1,100 +1,76 @@
-import numpy as np
-import chainer
 import chainer.functions as F
-from chainer import Chain, cuda
+from chainer import cuda
 from shoelace.functions.logcumsumexp import logcumsumexp
 
 
-class AbstractListLoss(Chain):
-    """
-    An abstract listwise loss function
-    
-    This loss calls the prediction function on the target variable and calls
-    a local `AbstractListLoss.loss` function which should be implemented by
-    subclasses
-    """
-    def __init__(self, predictor):
-        super(AbstractListLoss, self).__init__(predictor=predictor)
-
-    def __call__(self, x, t):
-        x_hat = self.predictor(x)
-        loss = self.loss(x_hat, t)
-        return loss
-
-    def loss(self, x, t):
-        raise NotImplementedError
-
-
-class ListMLELoss(AbstractListLoss):
+def listmle(x, t):
     """
     The ListMLE loss as in Xia et al (2008), Listwise Approach to Learning to
     Rank - Theory and Algorithm.
+    
+    :param x: The activation of the previous layer 
+    :param t: The target labels
+    :return: The loss
     """
-    def __init__(self, predictor):
-        super(ListMLELoss, self).__init__(predictor=predictor)
 
-    def loss(self, x, t):
-        """
-        Assuming target labels are already sorted by relevance
-        :param x: The x variable 
-        :param t: The target variable
-        :return: The loss
-        """
-        final = logcumsumexp(x)
-        return F.sum(final - x)
+    # Get the ground truth by sorting activations by the relevance labels
+    xp = cuda.get_array_module(t)
+    t_hat = t[:, 0]
+    x_hat = x[xp.flip(xp.argsort(t_hat), axis=0)]
+
+    # Compute MLE loss
+    final = logcumsumexp(x_hat)
+    return F.sum(final - x_hat)
 
 
-class ListNetLoss(AbstractListLoss):
+def listnet(x, t):
     """
     The Top-1 approximated ListNet loss as in Cao et al (2006), Learning to
     Rank: From Pairwise Approach to Listwise Approach
+    
+    :param x: The activation of the previous layer 
+    :param t: The target labels
+    :return: The loss
     """
-    def __init__(self, predictor):
-        super(ListNetLoss, self).__init__(predictor=predictor)
 
-    def loss(self, x, t):
-        """
-        ListNet top-1 reduces to a softmax and simple cross entropy
-        :param x: The x variable
-        :param t: The target variable
-        :return: The loss
-        """
-        st = F.softmax(t, axis=0)
-        sx = F.softmax(x, axis=0)
-        return -F.mean(st * F.log(sx))
+    # ListNet top-1 reduces to a softmax and simple cross entropy
+    st = F.softmax(t, axis=0)
+    sx = F.softmax(x, axis=0)
+    return -F.mean(st * F.log(sx))
 
 
-class ListPLLoss(AbstractListLoss):
+def listpl(x, t, α=15.0):
     """
     The ListPL loss, a stochastic variant of ListMLE that in expectation
     approximates the true ListNet loss.
+    
+    :param x: The activation of the previous layer 
+    :param t: The target labels
+    :param α: The smoothing factor
+    :return: The loss
     """
-    def __init__(self, predictor, α=15.0):
-        super(ListPLLoss, self).__init__(predictor=predictor)
-        self.α = α
 
-    def loss(self, x, t):
-        # Sample permutation from PL(t)
-        index = self.pl_sample(t)
-        x = x[index]
+    # Sample permutation from PL(t)
+    index = _pl_sample(t, α)
+    x = x[index]
 
-        # Compute MLE loss
-        final = logcumsumexp(x)
-        return F.sum(final - x)
+    # Compute MLE loss
+    final = logcumsumexp(x)
+    return F.sum(final - x)
 
-    def pl_sample(self, t):
-        """
-        Sample from the plackett luce distribution directly
 
-        :param t: The target labels 
-        :return: A random permutation from the plackett-luce distribution
-                 parameterized by the target labels
-        """
-        xp = cuda.get_array_module(t)
-        t = t[:, 0]
+def _pl_sample(t, α):
+    """
+    Sample from the plackett luce distribution directly
 
-        probs = xp.exp(t * self.α)
-        probs /= xp.sum(probs)
-        return xp.random.choice(probs.shape[0], probs.shape[0], replace=False,
-                                p=probs)
+    :param t: The target labels 
+    :return: A random permutation from the plackett-luce distribution
+             parameterized by the target labels
+    """
+    xp = cuda.get_array_module(t)
+    t = t[:, 0]
 
+    probs = xp.exp(t * α)
+    probs /= xp.sum(probs)
+    return xp.random.choice(probs.shape[0], probs.shape[0], replace=False,
+                            p=probs)
diff --git a/test/examples/test_linear_network.py b/test/examples/test_linear_network.py
index 59e6aa1..29d813a 100644
--- a/test/examples/test_linear_network.py
+++ b/test/examples/test_linear_network.py
@@ -1,13 +1,23 @@
 import numpy as np
-from chainer import training, optimizers, links
+from chainer import training, optimizers, links, Chain
 from chainer.dataset import convert
 from nose.tools import assert_almost_equal
 
 from shoelace.iterator import LtrIterator
-from shoelace.loss.listwise import ListNetLoss
+from shoelace.loss.listwise import listnet
 from test.utils import get_dataset
 
 
+class Ranker(Chain):
+    def __init__(self, predictor, loss):
+        super(Ranker, self).__init__(predictor=predictor)
+        self.loss = loss
+
+    def __call__(self, x, t):
+        x_hat = self.predictor(x)
+        return self.loss(x_hat, t)
+
+
 def test_linear_network():
 
     # To ensure repeatability of experiments
@@ -20,7 +30,7 @@ def test_linear_network():
 
     # Create neural network with chainer and apply our loss function
     predictor = links.Linear(None, 1)
-    loss = ListNetLoss(predictor)
+    loss = Ranker(predictor, listnet)
 
     # Build optimizer, updater and trainer
     optimizer = optimizers.Adam(alpha=0.2)
diff --git a/test/functions/test_logcumsumexp.py b/test/functions/test_logcumsumexp.py
index 74d2560..ca74bda 100644
--- a/test/functions/test_logcumsumexp.py
+++ b/test/functions/test_logcumsumexp.py
@@ -1,5 +1,5 @@
 import numpy as np
-from nose.tools import assert_equal, assert_true
+from nose.tools import assert_true
 from shoelace.functions.logcumsumexp import logcumsumexp, LogCumsumExp
 from chainer import Variable
 
diff --git a/test/loss/test_listwise.py b/test/loss/test_listwise.py
index a275ef2..1bf8d14 100644
--- a/test/loss/test_listwise.py
+++ b/test/loss/test_listwise.py
@@ -1,59 +1,37 @@
 import numpy as np
-from nose.tools import raises, assert_equal, assert_almost_equal
-from chainer import Variable, Link
-from shoelace.loss.listwise import ListNetLoss, ListMLELoss, ListPLLoss, \
-    AbstractListLoss
-
-
-class Identity(Link):
-    def __init__(self):
-        super(Identity, self).__init__()
-
-    def __call__(self, x):
-        return x
-
-
-@raises(NotImplementedError)
-def test_abstract_notimplemented():
-    x = np.array([[3., 3., 2., 0.]]).T
-    t = np.array([[3., 2., 0., 1.]]).T
-    loss = AbstractListLoss(Identity())
-    loss(x, t)
+from nose.tools import assert_equal, assert_almost_equal
+from shoelace.loss.listwise import listnet, listmle, listpl
 
 
 def test_listnet():
     x = np.array([[3., 3., 2., 0.]]).T
     t = np.array([[0.5, 1.0, 0.3, 0.5]]).T
-    loss = ListNetLoss(Identity())
 
-    result = loss(x, t)
+    result = listnet(x, t)
     assert_equal(result.data, 0.43439806229182915)
 
 
 def test_listnet_near_zero_loss():
     x = np.array([[600., 400., 200., 0.]]).T
     t = np.array([[600., 400., 200., 0.]]).T
-    loss = ListNetLoss(Identity())
 
-    result = loss(x, t)
+    result = listnet(x, t)
     assert_almost_equal(result.data, 0.0)
 
 
 def test_listmle():
     x = np.array([[3., 3., 2., 0.]]).T
     t = np.array([[0.5, 1.0, 0.3, 0.5]]).T
-    loss = ListMLELoss(Identity())
 
-    result = loss(x, t)
-    assert_equal(result.data, 1.3587430505329978)
+    result = listmle(x, t)
+    assert_equal(result.data, 4.545076727008247)
 
 
 def test_listmle_near_zero_loss():
     x = np.array([[600., 400., 200., 0.]]).T
     t = np.array([[600., 400., 200., 0.]]).T
-    loss = ListMLELoss(Identity())
 
-    result = loss(x, t)
+    result = listmle(x, t)
     assert_almost_equal(result.data, 0.0)
 
 
@@ -61,9 +39,8 @@ def test_listpl():
     np.random.seed(4101)
     x = np.array([[3., 3., 2., 0.]]).T
     t = np.array([[0.5, 1.0, 0.3, 0.5]]).T
-    loss = ListPLLoss(Identity())
 
-    result = loss(x, t)
+    result = listpl(x, t)
     assert_equal(result.data, 3.358743050532998)
 
 
@@ -71,7 +48,6 @@ def test_listpl_near_zero_loss():
     np.random.seed(4101)
     x = np.array([[40., 20., 0.]]).T
     t = np.array([[40., 20., 0.]]).T
-    loss = ListPLLoss(Identity())
 
-    result = loss(x, t)
+    result = listpl(x, t)
     assert_almost_equal(result.data, 0.0)

From 668150cf6649f9c9b628a63f58ac87cb1b603c0b Mon Sep 17 00:00:00 2001
From: Rolf Jagerman <rjagerman@gmail.com>
Date: Wed, 28 Jun 2017 18:29:16 +0200
Subject: [PATCH 3/5] Add first version of nDCG evaluation

---
 shoelace/evaluation.py  |  62 ++++++++++++++++++++++++
 test/test_evaluation.py | 105 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 167 insertions(+)
 create mode 100644 shoelace/evaluation.py
 create mode 100644 test/test_evaluation.py

diff --git a/shoelace/evaluation.py b/shoelace/evaluation.py
new file mode 100644
index 0000000..77add21
--- /dev/null
+++ b/shoelace/evaluation.py
@@ -0,0 +1,62 @@
+from chainer import cuda, function
+
+
+class NDCG(function.Function):
+    def __init__(self, k=0):
+        self.k = k
+
+    def forward(self, inputs):
+        xp = cuda.get_array_module(*inputs)
+        y, t = inputs
+
+        # Assert arrays have the same shape
+        if t.shape != y.shape:
+            raise ValueError("Input arrays have different shapes")
+
+        # Computing nDCG on empty array should just return 0.0
+        if t.shape[0] == 0:
+            return xp.asarray(0.0),
+
+        # Compute predicted indices by arg sorting
+        predicted_indices = xp.argsort(y)
+        best_indices = xp.argsort(t)
+
+        # Predicted and theoretically best relevance labels
+        predicted_relevance = xp.flip(t[predicted_indices], axis=0)
+        best_relevance = xp.flip(t[best_indices], axis=0)
+
+        # Compute needed statistics
+        length = predicted_relevance.shape[0]
+        arange = xp.arange(length)
+        last = min(self.k, length)
+        if last < 1:
+            last = length
+
+        # Compute regular DCG
+        dcg_numerator = 2 ** predicted_relevance[:last] - 1
+        dcg_denominator = xp.log2(arange[:last] + 2)
+        dcg = xp.sum(dcg_numerator / dcg_denominator)
+
+        # Compute iDCG for normalization
+        idcg_numerator = (2 ** best_relevance[:last] - 1)
+        idcg_denominator = (xp.log2(arange[:last] + 2))
+        idcg = xp.sum(idcg_numerator / idcg_denominator)
+
+        if idcg == 0.0:
+            return xp.asarray(1.0),
+
+        return xp.asarray(dcg / idcg),
+
+
+def ndcg(y, t, k=0):
+    """
+    Computes the nDCG@k for given list of true relevance labels (y_true) and
+    given list of predicted relevance labels (y_score)
+
+    :param y_true: The ground truth relevance labels 
+    :param y_score: The predicted relevance scores
+    :param k: The cut-off point (if set to smaller or equal to 0, it does not
+              cut-off)
+    :return: The nDCG@k value
+    """
+    return NDCG(k=k)(y, t)
diff --git a/test/test_evaluation.py b/test/test_evaluation.py
new file mode 100644
index 0000000..0adbadf
--- /dev/null
+++ b/test/test_evaluation.py
@@ -0,0 +1,105 @@
+import numpy as np
+from nose.tools import raises, assert_equal
+
+from shoelace.evaluation import ndcg
+
+
+def test_ndcg():
+
+    # Set up data
+    prediction = np.array([0.1, 0.9, 0.2, 3.0, 0.15])
+    ground_truth = np.array([3.0, 3.0, 2.0, 1.0, 1.0])
+
+    # Compute and assert nDCG value
+    assert_equal(ndcg(prediction, ground_truth).data, 0.73213389587665278)
+
+
+def test_ndcg_2():
+
+    # Set up data
+    prediction = np.array([0.1, 0.9, 0.2, 0.15, 3.0])
+    ground_truth = np.array([3.0, 3.0, 2.0, 1.0, 1.0])
+
+    # Compute and assert nDCG value
+    assert_equal(ndcg(prediction, ground_truth).data, 0.73213389587665278)
+
+
+def test_ndcg_3():
+
+    # Set up data
+    prediction = np.array([0.1, 0.9, 0.2, 0.15, 3.0])
+    ground_truth = np.array([3.0, 3.0, 2.0, 1.0, 2.0])
+
+    # Compute and assert nDCG value
+    assert_equal(ndcg(prediction, ground_truth).data, 0.8259562683091511)
+
+
+def test_ndcg_perfect():
+
+    # Set up data
+    prediction = np.array([4.0, 3.0, 2.0, 1.0, 0.0])
+    ground_truth = np.array([3.0, 3.0, 2.0, 1.0, 1.0])
+
+    # Compute and assert nDCG value
+    assert_equal(ndcg(prediction, ground_truth).data, 1.0)
+
+
+def test_ndcg_minimal():
+
+    # Set up data
+    prediction = np.arange(10).astype(dtype=np.float32)
+    ground_truth = np.flip(prediction, axis=0)
+
+    # Compute and assert nDCG value
+    assert_equal(ndcg(prediction, ground_truth).data, 0.39253964576233569)
+
+
+def test_ndcg_at_k():
+
+    # Set up data
+    prediction = np.array([0.3, 0.3, 0.2, 2.14, 0.23])
+    ground_truth = np.array([3.0, 3.0, 2.0, 1.0, 1.0])
+
+    # Compute and assert nDCG@3 value
+    assert_equal(ndcg(prediction, ground_truth, k=3).data, 0.69031878315427031)
+
+
+def test_empty_ndcg():
+
+    # Set up data
+    prediction = np.array([])
+    ground_truth = np.array([])
+
+    # Assert nDCG of empty lists
+    assert_equal(ndcg(prediction, ground_truth).data, 0.0)
+
+
+def test_ndcg_no_preferences():
+
+    # Set up data
+    prediction = np.array([0.3, 0.3, 0.2, 2.14, 0.23])
+    ground_truth = np.array([0.0, 0.0, 0.0, 0.0, 0.0])
+
+    # Compute and assert nDCG value
+    assert_equal(ndcg(prediction, ground_truth).data, 1.0)
+
+
+def test_ndcg_negative_predictions():
+
+    # Set up data
+    prediction = np.array([-0.1, -0.3, 1.9, -0.9, -0.2])
+    ground_truth = np.array([0.0, 1.0, 1.0, 0.0, 0.0])
+
+    # Compute and assert nDCG value
+    assert_equal(ndcg(prediction, ground_truth).data, 0.8772153153380493)
+
+
+@raises(ValueError)
+def test_unequal_ndcg():
+
+    # Set up data
+    prediction = np.array([0.3, 0.3, 0.2])
+    ground_truth = np.array([3.0, 3.0, 2.0, 1.0, 1.0, 2.3])
+
+    # This should raise a ValueError because the lists aren't of equal length
+    ndcg(prediction, ground_truth)

From c38f0fd56d6124f65698f7177cb16f7ba87c36f4 Mon Sep 17 00:00:00 2001
From: Rolf Jagerman <rjagerman@gmail.com>
Date: Wed, 28 Jun 2017 18:40:19 +0200
Subject: [PATCH 4/5] Update readme to reflect new loss code

---
 README.md | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index a98044d..c4c0c61 100644
--- a/README.md
+++ b/README.md
@@ -30,9 +30,9 @@ Additionally, we provide minibatch iterators for Learning to Rank datasets. Thes
 
 Currently we provide implementations for the following loss functions
 
- * Top-1 ListNet: `shoelace.loss.listwise.ListNetLoss`
- * ListMLE: `shoelace.loss.listwise.ListMLELoss`
- * ListPL: `shoelace.loss.listwise.ListPLLoss`
+ * Top-1 ListNet: `shoelace.loss.listwise.listnet`
+ * ListMLE: `shoelace.loss.listwise.listmle`
+ * ListPL: `shoelace.loss.listwise.listpl`
 
 ## Example
 
@@ -40,26 +40,28 @@ Here is an example script that will train up a single-layer linear neural networ
 
     from shoelace.dataset import LtrDataset
     from shoelace.iterator import LtrIterator
-    from shoelace.loss.listwise import ListNetLoss
-    from chainer import training, optimizers, links
+    from shoelace.loss.listwise import listnet
+    from chainer import training, optimizers, links, Chain
     from chainer.training import extensions
     
     # Load data and set up iterator
-    with open('./path/to/svmrank.txt', 'r') as f:
+    with open('./path/to/ranksvm.txt', 'r') as f:
         training_set = LtrDataset.load_txt(f)
     training_iterator = LtrIterator(training_set, repeat=True, shuffle=True)
-    
-    # Create neural network with chainer and apply our loss function
+
+    # Create neural network with chainer and apply loss function
     predictor = links.Linear(None, 1)
-    loss = ListNetLoss(predictor)
-    
+    class Ranker(Chain):
+        def __call__(self, x, t):
+            return listnet(self.predictor(x), t)
+    loss = Ranker(predictor=predictor)
+
     # Build optimizer, updater and trainer
     optimizer = optimizers.Adam()
     optimizer.setup(loss)
     updater = training.StandardUpdater(training_iterator, optimizer)
     trainer = training.Trainer(updater, (40, 'epoch'))
     trainer.extend(extensions.ProgressBar())
-    
+
     # Train neural network
     trainer.run()
-

From 520ed58869fe614e4e3352a12b14ed875a8564b7 Mon Sep 17 00:00:00 2001
From: Rolf Jagerman <rjagerman@gmail.com>
Date: Thu, 29 Jun 2017 11:46:21 +0200
Subject: [PATCH 5/5] Bump version to 0.2.0

---
 setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 8218beb..b7f0f99 100644
--- a/setup.py
+++ b/setup.py
@@ -2,10 +2,10 @@
 
 setup(
     name='shoelace',
-    version='0.1.0',
+    version='0.2.0',
     description='Neural Learning to Rank using Chainer',
     url='https://github.com/rjagerman/shoelace',
-    download_url = 'https://github.com/rjagerman/shoelace/archive/v0.1.0.tar.gz',
+    download_url = 'https://github.com/rjagerman/shoelace/archive/v0.2.0.tar.gz',
     author='Rolf Jagerman',
     author_email='rjagerman@gmail.com',
     license='MIT',