From 435e7fb1e6abf8afbcfc9f7e8943bdd89845ca4a Mon Sep 17 00:00:00 2001 From: Rolf Jagerman Date: Wed, 28 Jun 2017 15:04:46 +0200 Subject: [PATCH 1/5] Update ListPL to be more efficient on GPU with new cupy functions --- shoelace/loss/listwise.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/shoelace/loss/listwise.py b/shoelace/loss/listwise.py index d6ef7c1..fdd36aa 100644 --- a/shoelace/loss/listwise.py +++ b/shoelace/loss/listwise.py @@ -91,12 +91,10 @@ def pl_sample(self, t): parameterized by the target labels """ xp = cuda.get_array_module(t) - if not hasattr(xp, 'asnumpy'): - xp.asnumpy = lambda x: x t = t[:, 0] probs = xp.exp(t * self.α) probs /= xp.sum(probs) - return np.random.choice(probs.shape[0], probs.shape[0], replace=False, - p=xp.asnumpy(probs)) + return xp.random.choice(probs.shape[0], probs.shape[0], replace=False, + p=probs) From 524ade8d43a5e6d81247183f958ab1d467b8950b Mon Sep 17 00:00:00 2001 From: Rolf Jagerman Date: Wed, 28 Jun 2017 18:28:55 +0200 Subject: [PATCH 2/5] Simplify loss function code and make it more modular --- shoelace/loss/listwise.py | 120 +++++++++++---------------- test/examples/test_linear_network.py | 16 +++- test/functions/test_logcumsumexp.py | 2 +- test/loss/test_listwise.py | 42 ++-------- 4 files changed, 71 insertions(+), 109 deletions(-) diff --git a/shoelace/loss/listwise.py b/shoelace/loss/listwise.py index fdd36aa..a615026 100644 --- a/shoelace/loss/listwise.py +++ b/shoelace/loss/listwise.py @@ -1,100 +1,76 @@ -import numpy as np -import chainer import chainer.functions as F -from chainer import Chain, cuda +from chainer import cuda from shoelace.functions.logcumsumexp import logcumsumexp -class AbstractListLoss(Chain): - """ - An abstract listwise loss function - - This loss calls the prediction function on the target variable and calls - a local `AbstractListLoss.loss` function which should be implemented by - subclasses - """ - def __init__(self, predictor): - super(AbstractListLoss, self).__init__(predictor=predictor) - - def __call__(self, x, t): - x_hat = self.predictor(x) - loss = self.loss(x_hat, t) - return loss - - def loss(self, x, t): - raise NotImplementedError - - -class ListMLELoss(AbstractListLoss): +def listmle(x, t): """ The ListMLE loss as in Xia et al (2008), Listwise Approach to Learning to Rank - Theory and Algorithm. + + :param x: The activation of the previous layer + :param t: The target labels + :return: The loss """ - def __init__(self, predictor): - super(ListMLELoss, self).__init__(predictor=predictor) - def loss(self, x, t): - """ - Assuming target labels are already sorted by relevance - :param x: The x variable - :param t: The target variable - :return: The loss - """ - final = logcumsumexp(x) - return F.sum(final - x) + # Get the ground truth by sorting activations by the relevance labels + xp = cuda.get_array_module(t) + t_hat = t[:, 0] + x_hat = x[xp.flip(xp.argsort(t_hat), axis=0)] + + # Compute MLE loss + final = logcumsumexp(x_hat) + return F.sum(final - x_hat) -class ListNetLoss(AbstractListLoss): +def listnet(x, t): """ The Top-1 approximated ListNet loss as in Cao et al (2006), Learning to Rank: From Pairwise Approach to Listwise Approach + + :param x: The activation of the previous layer + :param t: The target labels + :return: The loss """ - def __init__(self, predictor): - super(ListNetLoss, self).__init__(predictor=predictor) - def loss(self, x, t): - """ - ListNet top-1 reduces to a softmax and simple cross entropy - :param x: The x variable - :param t: The target variable - :return: The loss - """ - st = F.softmax(t, axis=0) - sx = F.softmax(x, axis=0) - return -F.mean(st * F.log(sx)) + # ListNet top-1 reduces to a softmax and simple cross entropy + st = F.softmax(t, axis=0) + sx = F.softmax(x, axis=0) + return -F.mean(st * F.log(sx)) -class ListPLLoss(AbstractListLoss): +def listpl(x, t, α=15.0): """ The ListPL loss, a stochastic variant of ListMLE that in expectation approximates the true ListNet loss. + + :param x: The activation of the previous layer + :param t: The target labels + :param α: The smoothing factor + :return: The loss """ - def __init__(self, predictor, α=15.0): - super(ListPLLoss, self).__init__(predictor=predictor) - self.α = α - def loss(self, x, t): - # Sample permutation from PL(t) - index = self.pl_sample(t) - x = x[index] + # Sample permutation from PL(t) + index = _pl_sample(t, α) + x = x[index] - # Compute MLE loss - final = logcumsumexp(x) - return F.sum(final - x) + # Compute MLE loss + final = logcumsumexp(x) + return F.sum(final - x) - def pl_sample(self, t): - """ - Sample from the plackett luce distribution directly - :param t: The target labels - :return: A random permutation from the plackett-luce distribution - parameterized by the target labels - """ - xp = cuda.get_array_module(t) - t = t[:, 0] +def _pl_sample(t, α): + """ + Sample from the plackett luce distribution directly - probs = xp.exp(t * self.α) - probs /= xp.sum(probs) - return xp.random.choice(probs.shape[0], probs.shape[0], replace=False, - p=probs) + :param t: The target labels + :return: A random permutation from the plackett-luce distribution + parameterized by the target labels + """ + xp = cuda.get_array_module(t) + t = t[:, 0] + probs = xp.exp(t * α) + probs /= xp.sum(probs) + return xp.random.choice(probs.shape[0], probs.shape[0], replace=False, + p=probs) diff --git a/test/examples/test_linear_network.py b/test/examples/test_linear_network.py index 59e6aa1..29d813a 100644 --- a/test/examples/test_linear_network.py +++ b/test/examples/test_linear_network.py @@ -1,13 +1,23 @@ import numpy as np -from chainer import training, optimizers, links +from chainer import training, optimizers, links, Chain from chainer.dataset import convert from nose.tools import assert_almost_equal from shoelace.iterator import LtrIterator -from shoelace.loss.listwise import ListNetLoss +from shoelace.loss.listwise import listnet from test.utils import get_dataset +class Ranker(Chain): + def __init__(self, predictor, loss): + super(Ranker, self).__init__(predictor=predictor) + self.loss = loss + + def __call__(self, x, t): + x_hat = self.predictor(x) + return self.loss(x_hat, t) + + def test_linear_network(): # To ensure repeatability of experiments @@ -20,7 +30,7 @@ def test_linear_network(): # Create neural network with chainer and apply our loss function predictor = links.Linear(None, 1) - loss = ListNetLoss(predictor) + loss = Ranker(predictor, listnet) # Build optimizer, updater and trainer optimizer = optimizers.Adam(alpha=0.2) diff --git a/test/functions/test_logcumsumexp.py b/test/functions/test_logcumsumexp.py index 74d2560..ca74bda 100644 --- a/test/functions/test_logcumsumexp.py +++ b/test/functions/test_logcumsumexp.py @@ -1,5 +1,5 @@ import numpy as np -from nose.tools import assert_equal, assert_true +from nose.tools import assert_true from shoelace.functions.logcumsumexp import logcumsumexp, LogCumsumExp from chainer import Variable diff --git a/test/loss/test_listwise.py b/test/loss/test_listwise.py index a275ef2..1bf8d14 100644 --- a/test/loss/test_listwise.py +++ b/test/loss/test_listwise.py @@ -1,59 +1,37 @@ import numpy as np -from nose.tools import raises, assert_equal, assert_almost_equal -from chainer import Variable, Link -from shoelace.loss.listwise import ListNetLoss, ListMLELoss, ListPLLoss, \ - AbstractListLoss - - -class Identity(Link): - def __init__(self): - super(Identity, self).__init__() - - def __call__(self, x): - return x - - -@raises(NotImplementedError) -def test_abstract_notimplemented(): - x = np.array([[3., 3., 2., 0.]]).T - t = np.array([[3., 2., 0., 1.]]).T - loss = AbstractListLoss(Identity()) - loss(x, t) +from nose.tools import assert_equal, assert_almost_equal +from shoelace.loss.listwise import listnet, listmle, listpl def test_listnet(): x = np.array([[3., 3., 2., 0.]]).T t = np.array([[0.5, 1.0, 0.3, 0.5]]).T - loss = ListNetLoss(Identity()) - result = loss(x, t) + result = listnet(x, t) assert_equal(result.data, 0.43439806229182915) def test_listnet_near_zero_loss(): x = np.array([[600., 400., 200., 0.]]).T t = np.array([[600., 400., 200., 0.]]).T - loss = ListNetLoss(Identity()) - result = loss(x, t) + result = listnet(x, t) assert_almost_equal(result.data, 0.0) def test_listmle(): x = np.array([[3., 3., 2., 0.]]).T t = np.array([[0.5, 1.0, 0.3, 0.5]]).T - loss = ListMLELoss(Identity()) - result = loss(x, t) - assert_equal(result.data, 1.3587430505329978) + result = listmle(x, t) + assert_equal(result.data, 4.545076727008247) def test_listmle_near_zero_loss(): x = np.array([[600., 400., 200., 0.]]).T t = np.array([[600., 400., 200., 0.]]).T - loss = ListMLELoss(Identity()) - result = loss(x, t) + result = listmle(x, t) assert_almost_equal(result.data, 0.0) @@ -61,9 +39,8 @@ def test_listpl(): np.random.seed(4101) x = np.array([[3., 3., 2., 0.]]).T t = np.array([[0.5, 1.0, 0.3, 0.5]]).T - loss = ListPLLoss(Identity()) - result = loss(x, t) + result = listpl(x, t) assert_equal(result.data, 3.358743050532998) @@ -71,7 +48,6 @@ def test_listpl_near_zero_loss(): np.random.seed(4101) x = np.array([[40., 20., 0.]]).T t = np.array([[40., 20., 0.]]).T - loss = ListPLLoss(Identity()) - result = loss(x, t) + result = listpl(x, t) assert_almost_equal(result.data, 0.0) From 668150cf6649f9c9b628a63f58ac87cb1b603c0b Mon Sep 17 00:00:00 2001 From: Rolf Jagerman Date: Wed, 28 Jun 2017 18:29:16 +0200 Subject: [PATCH 3/5] Add first version of nDCG evaluation --- shoelace/evaluation.py | 62 ++++++++++++++++++++++++ test/test_evaluation.py | 105 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 167 insertions(+) create mode 100644 shoelace/evaluation.py create mode 100644 test/test_evaluation.py diff --git a/shoelace/evaluation.py b/shoelace/evaluation.py new file mode 100644 index 0000000..77add21 --- /dev/null +++ b/shoelace/evaluation.py @@ -0,0 +1,62 @@ +from chainer import cuda, function + + +class NDCG(function.Function): + def __init__(self, k=0): + self.k = k + + def forward(self, inputs): + xp = cuda.get_array_module(*inputs) + y, t = inputs + + # Assert arrays have the same shape + if t.shape != y.shape: + raise ValueError("Input arrays have different shapes") + + # Computing nDCG on empty array should just return 0.0 + if t.shape[0] == 0: + return xp.asarray(0.0), + + # Compute predicted indices by arg sorting + predicted_indices = xp.argsort(y) + best_indices = xp.argsort(t) + + # Predicted and theoretically best relevance labels + predicted_relevance = xp.flip(t[predicted_indices], axis=0) + best_relevance = xp.flip(t[best_indices], axis=0) + + # Compute needed statistics + length = predicted_relevance.shape[0] + arange = xp.arange(length) + last = min(self.k, length) + if last < 1: + last = length + + # Compute regular DCG + dcg_numerator = 2 ** predicted_relevance[:last] - 1 + dcg_denominator = xp.log2(arange[:last] + 2) + dcg = xp.sum(dcg_numerator / dcg_denominator) + + # Compute iDCG for normalization + idcg_numerator = (2 ** best_relevance[:last] - 1) + idcg_denominator = (xp.log2(arange[:last] + 2)) + idcg = xp.sum(idcg_numerator / idcg_denominator) + + if idcg == 0.0: + return xp.asarray(1.0), + + return xp.asarray(dcg / idcg), + + +def ndcg(y, t, k=0): + """ + Computes the nDCG@k for given list of true relevance labels (y_true) and + given list of predicted relevance labels (y_score) + + :param y_true: The ground truth relevance labels + :param y_score: The predicted relevance scores + :param k: The cut-off point (if set to smaller or equal to 0, it does not + cut-off) + :return: The nDCG@k value + """ + return NDCG(k=k)(y, t) diff --git a/test/test_evaluation.py b/test/test_evaluation.py new file mode 100644 index 0000000..0adbadf --- /dev/null +++ b/test/test_evaluation.py @@ -0,0 +1,105 @@ +import numpy as np +from nose.tools import raises, assert_equal + +from shoelace.evaluation import ndcg + + +def test_ndcg(): + + # Set up data + prediction = np.array([0.1, 0.9, 0.2, 3.0, 0.15]) + ground_truth = np.array([3.0, 3.0, 2.0, 1.0, 1.0]) + + # Compute and assert nDCG value + assert_equal(ndcg(prediction, ground_truth).data, 0.73213389587665278) + + +def test_ndcg_2(): + + # Set up data + prediction = np.array([0.1, 0.9, 0.2, 0.15, 3.0]) + ground_truth = np.array([3.0, 3.0, 2.0, 1.0, 1.0]) + + # Compute and assert nDCG value + assert_equal(ndcg(prediction, ground_truth).data, 0.73213389587665278) + + +def test_ndcg_3(): + + # Set up data + prediction = np.array([0.1, 0.9, 0.2, 0.15, 3.0]) + ground_truth = np.array([3.0, 3.0, 2.0, 1.0, 2.0]) + + # Compute and assert nDCG value + assert_equal(ndcg(prediction, ground_truth).data, 0.8259562683091511) + + +def test_ndcg_perfect(): + + # Set up data + prediction = np.array([4.0, 3.0, 2.0, 1.0, 0.0]) + ground_truth = np.array([3.0, 3.0, 2.0, 1.0, 1.0]) + + # Compute and assert nDCG value + assert_equal(ndcg(prediction, ground_truth).data, 1.0) + + +def test_ndcg_minimal(): + + # Set up data + prediction = np.arange(10).astype(dtype=np.float32) + ground_truth = np.flip(prediction, axis=0) + + # Compute and assert nDCG value + assert_equal(ndcg(prediction, ground_truth).data, 0.39253964576233569) + + +def test_ndcg_at_k(): + + # Set up data + prediction = np.array([0.3, 0.3, 0.2, 2.14, 0.23]) + ground_truth = np.array([3.0, 3.0, 2.0, 1.0, 1.0]) + + # Compute and assert nDCG@3 value + assert_equal(ndcg(prediction, ground_truth, k=3).data, 0.69031878315427031) + + +def test_empty_ndcg(): + + # Set up data + prediction = np.array([]) + ground_truth = np.array([]) + + # Assert nDCG of empty lists + assert_equal(ndcg(prediction, ground_truth).data, 0.0) + + +def test_ndcg_no_preferences(): + + # Set up data + prediction = np.array([0.3, 0.3, 0.2, 2.14, 0.23]) + ground_truth = np.array([0.0, 0.0, 0.0, 0.0, 0.0]) + + # Compute and assert nDCG value + assert_equal(ndcg(prediction, ground_truth).data, 1.0) + + +def test_ndcg_negative_predictions(): + + # Set up data + prediction = np.array([-0.1, -0.3, 1.9, -0.9, -0.2]) + ground_truth = np.array([0.0, 1.0, 1.0, 0.0, 0.0]) + + # Compute and assert nDCG value + assert_equal(ndcg(prediction, ground_truth).data, 0.8772153153380493) + + +@raises(ValueError) +def test_unequal_ndcg(): + + # Set up data + prediction = np.array([0.3, 0.3, 0.2]) + ground_truth = np.array([3.0, 3.0, 2.0, 1.0, 1.0, 2.3]) + + # This should raise a ValueError because the lists aren't of equal length + ndcg(prediction, ground_truth) From c38f0fd56d6124f65698f7177cb16f7ba87c36f4 Mon Sep 17 00:00:00 2001 From: Rolf Jagerman Date: Wed, 28 Jun 2017 18:40:19 +0200 Subject: [PATCH 4/5] Update readme to reflect new loss code --- README.md | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index a98044d..c4c0c61 100644 --- a/README.md +++ b/README.md @@ -30,9 +30,9 @@ Additionally, we provide minibatch iterators for Learning to Rank datasets. Thes Currently we provide implementations for the following loss functions - * Top-1 ListNet: `shoelace.loss.listwise.ListNetLoss` - * ListMLE: `shoelace.loss.listwise.ListMLELoss` - * ListPL: `shoelace.loss.listwise.ListPLLoss` + * Top-1 ListNet: `shoelace.loss.listwise.listnet` + * ListMLE: `shoelace.loss.listwise.listmle` + * ListPL: `shoelace.loss.listwise.listpl` ## Example @@ -40,26 +40,28 @@ Here is an example script that will train up a single-layer linear neural networ from shoelace.dataset import LtrDataset from shoelace.iterator import LtrIterator - from shoelace.loss.listwise import ListNetLoss - from chainer import training, optimizers, links + from shoelace.loss.listwise import listnet + from chainer import training, optimizers, links, Chain from chainer.training import extensions # Load data and set up iterator - with open('./path/to/svmrank.txt', 'r') as f: + with open('./path/to/ranksvm.txt', 'r') as f: training_set = LtrDataset.load_txt(f) training_iterator = LtrIterator(training_set, repeat=True, shuffle=True) - - # Create neural network with chainer and apply our loss function + + # Create neural network with chainer and apply loss function predictor = links.Linear(None, 1) - loss = ListNetLoss(predictor) - + class Ranker(Chain): + def __call__(self, x, t): + return listnet(self.predictor(x), t) + loss = Ranker(predictor=predictor) + # Build optimizer, updater and trainer optimizer = optimizers.Adam() optimizer.setup(loss) updater = training.StandardUpdater(training_iterator, optimizer) trainer = training.Trainer(updater, (40, 'epoch')) trainer.extend(extensions.ProgressBar()) - + # Train neural network trainer.run() - From 520ed58869fe614e4e3352a12b14ed875a8564b7 Mon Sep 17 00:00:00 2001 From: Rolf Jagerman Date: Thu, 29 Jun 2017 11:46:21 +0200 Subject: [PATCH 5/5] Bump version to 0.2.0 --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 8218beb..b7f0f99 100644 --- a/setup.py +++ b/setup.py @@ -2,10 +2,10 @@ setup( name='shoelace', - version='0.1.0', + version='0.2.0', description='Neural Learning to Rank using Chainer', url='https://github.com/rjagerman/shoelace', - download_url = 'https://github.com/rjagerman/shoelace/archive/v0.1.0.tar.gz', + download_url = 'https://github.com/rjagerman/shoelace/archive/v0.2.0.tar.gz', author='Rolf Jagerman', author_email='rjagerman@gmail.com', license='MIT',