Merge pull request #1 from rjagerman/fix-updated-cupy

Update 0.2.0
rjagerman · Jun 29, 2017 · bcb3bf3 · bcb3bf3
2 parents fc4b98d + 520ed58
commit bcb3bf3
Show file tree

Hide file tree

Showing 8 changed files with 254 additions and 125 deletions.
diff --git a/README.md b/README.md
@@ -30,36 +30,38 @@ Additionally, we provide minibatch iterators for Learning to Rank datasets. Thes
 
 Currently we provide implementations for the following loss functions
 
- * Top-1 ListNet: `shoelace.loss.listwise.ListNetLoss`
- * ListMLE: `shoelace.loss.listwise.ListMLELoss`
- * ListPL: `shoelace.loss.listwise.ListPLLoss`
+ * Top-1 ListNet: `shoelace.loss.listwise.listnet`
+ * ListMLE: `shoelace.loss.listwise.listmle`
+ * ListPL: `shoelace.loss.listwise.listpl`
 
 ## Example
 
 Here is an example script that will train up a single-layer linear neural network with a ListNet loss function:
 
     from shoelace.dataset import LtrDataset
     from shoelace.iterator import LtrIterator
-    from shoelace.loss.listwise import ListNetLoss
-    from chainer import training, optimizers, links
+    from shoelace.loss.listwise import listnet
+    from chainer import training, optimizers, links, Chain
     from chainer.training import extensions
 
     # Load data and set up iterator
-    with open('./path/to/svmrank.txt', 'r') as f:
+    with open('./path/to/ranksvm.txt', 'r') as f:
         training_set = LtrDataset.load_txt(f)
     training_iterator = LtrIterator(training_set, repeat=True, shuffle=True)
-    
-    # Create neural network with chainer and apply our loss function
+
+    # Create neural network with chainer and apply loss function
     predictor = links.Linear(None, 1)
-    loss = ListNetLoss(predictor)
-
+    class Ranker(Chain):
+        def __call__(self, x, t):
+            return listnet(self.predictor(x), t)
+    loss = Ranker(predictor=predictor)
+
     # Build optimizer, updater and trainer
     optimizer = optimizers.Adam()
     optimizer.setup(loss)
     updater = training.StandardUpdater(training_iterator, optimizer)
     trainer = training.Trainer(updater, (40, 'epoch'))
     trainer.extend(extensions.ProgressBar())
-    
+
     # Train neural network
     trainer.run()
-
diff --git a/setup.py b/setup.py
@@ -2,10 +2,10 @@
 
 setup(
     name='shoelace',
-    version='0.1.0',
+    version='0.2.0',
     description='Neural Learning to Rank using Chainer',
     url='https://github.com/rjagerman/shoelace',
-    download_url = 'https://github.com/rjagerman/shoelace/archive/v0.1.0.tar.gz',
+    download_url = 'https://github.com/rjagerman/shoelace/archive/v0.2.0.tar.gz',
     author='Rolf Jagerman',
     author_email='rjagerman@gmail.com',
     license='MIT',

diff --git a/shoelace/evaluation.py b/shoelace/evaluation.py
@@ -0,0 +1,62 @@
+from chainer import cuda, function
+
+
+class NDCG(function.Function):
+    def __init__(self, k=0):
+        self.k = k
+
+    def forward(self, inputs):
+        xp = cuda.get_array_module(*inputs)
+        y, t = inputs
+
+        # Assert arrays have the same shape
+        if t.shape != y.shape:
+            raise ValueError("Input arrays have different shapes")
+
+        # Computing nDCG on empty array should just return 0.0
+        if t.shape[0] == 0:
+            return xp.asarray(0.0),
+
+        # Compute predicted indices by arg sorting
+        predicted_indices = xp.argsort(y)
+        best_indices = xp.argsort(t)
+
+        # Predicted and theoretically best relevance labels
+        predicted_relevance = xp.flip(t[predicted_indices], axis=0)
+        best_relevance = xp.flip(t[best_indices], axis=0)
+
+        # Compute needed statistics
+        length = predicted_relevance.shape[0]
+        arange = xp.arange(length)
+        last = min(self.k, length)
+        if last < 1:
+            last = length
+
+        # Compute regular DCG
+        dcg_numerator = 2 ** predicted_relevance[:last] - 1
+        dcg_denominator = xp.log2(arange[:last] + 2)
+        dcg = xp.sum(dcg_numerator / dcg_denominator)
+
+        # Compute iDCG for normalization
+        idcg_numerator = (2 ** best_relevance[:last] - 1)
+        idcg_denominator = (xp.log2(arange[:last] + 2))
+        idcg = xp.sum(idcg_numerator / idcg_denominator)
+
+        if idcg == 0.0:
+            return xp.asarray(1.0),
+
+        return xp.asarray(dcg / idcg),
+
+
+def ndcg(y, t, k=0):
+    """
+    Computes the nDCG@k for given list of true relevance labels (y_true) and
+    given list of predicted relevance labels (y_score)
+
+    :param y_true: The ground truth relevance labels 
+    :param y_score: The predicted relevance scores
+    :param k: The cut-off point (if set to smaller or equal to 0, it does not
+              cut-off)
+    :return: The nDCG@k value
+    """
+    return NDCG(k=k)(y, t)
diff --git a/shoelace/loss/listwise.py b/shoelace/loss/listwise.py
@@ -1,102 +1,76 @@
-import numpy as np
-import chainer
 import chainer.functions as F
-from chainer import Chain, cuda
+from chainer import cuda
 from shoelace.functions.logcumsumexp import logcumsumexp
 
 
-class AbstractListLoss(Chain):
-    """
-    An abstract listwise loss function
-    
-    This loss calls the prediction function on the target variable and calls
-    a local `AbstractListLoss.loss` function which should be implemented by
-    subclasses
-    """
-    def __init__(self, predictor):
-        super(AbstractListLoss, self).__init__(predictor=predictor)
-
-    def __call__(self, x, t):
-        x_hat = self.predictor(x)
-        loss = self.loss(x_hat, t)
-        return loss
-
-    def loss(self, x, t):
-        raise NotImplementedError
-
-
-class ListMLELoss(AbstractListLoss):
+def listmle(x, t):
     """
     The ListMLE loss as in Xia et al (2008), Listwise Approach to Learning to
     Rank - Theory and Algorithm.
+    
+    :param x: The activation of the previous layer 
+    :param t: The target labels
+    :return: The loss
     """
-    def __init__(self, predictor):
-        super(ListMLELoss, self).__init__(predictor=predictor)
 
-    def loss(self, x, t):
-        """
-        Assuming target labels are already sorted by relevance
-        :param x: The x variable 
-        :param t: The target variable
-        :return: The loss
-        """
-        final = logcumsumexp(x)
-        return F.sum(final - x)
+    # Get the ground truth by sorting activations by the relevance labels
+    xp = cuda.get_array_module(t)
+    t_hat = t[:, 0]
+    x_hat = x[xp.flip(xp.argsort(t_hat), axis=0)]
+
+    # Compute MLE loss
+    final = logcumsumexp(x_hat)
+    return F.sum(final - x_hat)
 
 
-class ListNetLoss(AbstractListLoss):
+def listnet(x, t):
     """
     The Top-1 approximated ListNet loss as in Cao et al (2006), Learning to
     Rank: From Pairwise Approach to Listwise Approach
+    
+    :param x: The activation of the previous layer 
+    :param t: The target labels
+    :return: The loss
     """
-    def __init__(self, predictor):
-        super(ListNetLoss, self).__init__(predictor=predictor)
 
-    def loss(self, x, t):
-        """
-        ListNet top-1 reduces to a softmax and simple cross entropy
-        :param x: The x variable
-        :param t: The target variable
-        :return: The loss
-        """
-        st = F.softmax(t, axis=0)
-        sx = F.softmax(x, axis=0)
-        return -F.mean(st * F.log(sx))
+    # ListNet top-1 reduces to a softmax and simple cross entropy
+    st = F.softmax(t, axis=0)
+    sx = F.softmax(x, axis=0)
+    return -F.mean(st * F.log(sx))
 
 
-class ListPLLoss(AbstractListLoss):
+def listpl(x, t, α=15.0):
     """
     The ListPL loss, a stochastic variant of ListMLE that in expectation
     approximates the true ListNet loss.
+    
+    :param x: The activation of the previous layer 
+    :param t: The target labels
+    :param α: The smoothing factor
+    :return: The loss
     """
-    def __init__(self, predictor, α=15.0):
-        super(ListPLLoss, self).__init__(predictor=predictor)
-        self.α = α
 
-    def loss(self, x, t):
-        # Sample permutation from PL(t)
-        index = self.pl_sample(t)
-        x = x[index]
+    # Sample permutation from PL(t)
+    index = _pl_sample(t, α)
+    x = x[index]
 
-        # Compute MLE loss
-        final = logcumsumexp(x)
-        return F.sum(final - x)
+    # Compute MLE loss
+    final = logcumsumexp(x)
+    return F.sum(final - x)
 
-    def pl_sample(self, t):
-        """
-        Sample from the plackett luce distribution directly
 
-        :param t: The target labels 
-        :return: A random permutation from the plackett-luce distribution
-                 parameterized by the target labels
-        """
-        xp = cuda.get_array_module(t)
-        if not hasattr(xp, 'asnumpy'):
-            xp.asnumpy = lambda x: x
-        t = t[:, 0]
+def _pl_sample(t, α):
+    """
+    Sample from the plackett luce distribution directly
 
-        probs = xp.exp(t * self.α)
-        probs /= xp.sum(probs)
-        return np.random.choice(probs.shape[0], probs.shape[0], replace=False,
-                                p=xp.asnumpy(probs))
+    :param t: The target labels 
+    :return: A random permutation from the plackett-luce distribution
+             parameterized by the target labels
+    """
+    xp = cuda.get_array_module(t)
+    t = t[:, 0]
 
+    probs = xp.exp(t * α)
+    probs /= xp.sum(probs)
+    return xp.random.choice(probs.shape[0], probs.shape[0], replace=False,
+                            p=probs)
diff --git a/test/examples/test_linear_network.py b/test/examples/test_linear_network.py
@@ -1,13 +1,23 @@
 import numpy as np
-from chainer import training, optimizers, links
+from chainer import training, optimizers, links, Chain
 from chainer.dataset import convert
 from nose.tools import assert_almost_equal
 
 from shoelace.iterator import LtrIterator
-from shoelace.loss.listwise import ListNetLoss
+from shoelace.loss.listwise import listnet
 from test.utils import get_dataset
 
 
+class Ranker(Chain):
+    def __init__(self, predictor, loss):
+        super(Ranker, self).__init__(predictor=predictor)
+        self.loss = loss
+
+    def __call__(self, x, t):
+        x_hat = self.predictor(x)
+        return self.loss(x_hat, t)
+
+
 def test_linear_network():
 
     # To ensure repeatability of experiments
@@ -20,7 +30,7 @@ def test_linear_network():
 
     # Create neural network with chainer and apply our loss function
     predictor = links.Linear(None, 1)
-    loss = ListNetLoss(predictor)
+    loss = Ranker(predictor, listnet)
 
     # Build optimizer, updater and trainer
     optimizer = optimizers.Adam(alpha=0.2)

diff --git a/test/functions/test_logcumsumexp.py b/test/functions/test_logcumsumexp.py
@@ -1,5 +1,5 @@
 import numpy as np
-from nose.tools import assert_equal, assert_true
+from nose.tools import assert_true
 from shoelace.functions.logcumsumexp import logcumsumexp, LogCumsumExp
 from chainer import Variable