Skip to content
This repository has been archived by the owner on Jun 29, 2020. It is now read-only.

Commit

Permalink
Add first version of nDCG evaluation
Browse files Browse the repository at this point in the history
  • Loading branch information
rjagerman committed Jun 28, 2017
1 parent 524ade8 commit 668150c
Show file tree
Hide file tree
Showing 2 changed files with 167 additions and 0 deletions.
62 changes: 62 additions & 0 deletions shoelace/evaluation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from chainer import cuda, function


class NDCG(function.Function):
def __init__(self, k=0):
self.k = k

def forward(self, inputs):
xp = cuda.get_array_module(*inputs)
y, t = inputs

# Assert arrays have the same shape
if t.shape != y.shape:
raise ValueError("Input arrays have different shapes")

# Computing nDCG on empty array should just return 0.0
if t.shape[0] == 0:
return xp.asarray(0.0),

# Compute predicted indices by arg sorting
predicted_indices = xp.argsort(y)
best_indices = xp.argsort(t)

# Predicted and theoretically best relevance labels
predicted_relevance = xp.flip(t[predicted_indices], axis=0)
best_relevance = xp.flip(t[best_indices], axis=0)

# Compute needed statistics
length = predicted_relevance.shape[0]
arange = xp.arange(length)
last = min(self.k, length)
if last < 1:
last = length

# Compute regular DCG
dcg_numerator = 2 ** predicted_relevance[:last] - 1
dcg_denominator = xp.log2(arange[:last] + 2)
dcg = xp.sum(dcg_numerator / dcg_denominator)

# Compute iDCG for normalization
idcg_numerator = (2 ** best_relevance[:last] - 1)
idcg_denominator = (xp.log2(arange[:last] + 2))
idcg = xp.sum(idcg_numerator / idcg_denominator)

if idcg == 0.0:
return xp.asarray(1.0),

return xp.asarray(dcg / idcg),


def ndcg(y, t, k=0):
"""
Computes the nDCG@k for given list of true relevance labels (y_true) and
given list of predicted relevance labels (y_score)
:param y_true: The ground truth relevance labels
:param y_score: The predicted relevance scores
:param k: The cut-off point (if set to smaller or equal to 0, it does not
cut-off)
:return: The nDCG@k value
"""
return NDCG(k=k)(y, t)
105 changes: 105 additions & 0 deletions test/test_evaluation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import numpy as np
from nose.tools import raises, assert_equal

from shoelace.evaluation import ndcg


def test_ndcg():

# Set up data
prediction = np.array([0.1, 0.9, 0.2, 3.0, 0.15])
ground_truth = np.array([3.0, 3.0, 2.0, 1.0, 1.0])

# Compute and assert nDCG value
assert_equal(ndcg(prediction, ground_truth).data, 0.73213389587665278)


def test_ndcg_2():

# Set up data
prediction = np.array([0.1, 0.9, 0.2, 0.15, 3.0])
ground_truth = np.array([3.0, 3.0, 2.0, 1.0, 1.0])

# Compute and assert nDCG value
assert_equal(ndcg(prediction, ground_truth).data, 0.73213389587665278)


def test_ndcg_3():

# Set up data
prediction = np.array([0.1, 0.9, 0.2, 0.15, 3.0])
ground_truth = np.array([3.0, 3.0, 2.0, 1.0, 2.0])

# Compute and assert nDCG value
assert_equal(ndcg(prediction, ground_truth).data, 0.8259562683091511)


def test_ndcg_perfect():

# Set up data
prediction = np.array([4.0, 3.0, 2.0, 1.0, 0.0])
ground_truth = np.array([3.0, 3.0, 2.0, 1.0, 1.0])

# Compute and assert nDCG value
assert_equal(ndcg(prediction, ground_truth).data, 1.0)


def test_ndcg_minimal():

# Set up data
prediction = np.arange(10).astype(dtype=np.float32)
ground_truth = np.flip(prediction, axis=0)

# Compute and assert nDCG value
assert_equal(ndcg(prediction, ground_truth).data, 0.39253964576233569)


def test_ndcg_at_k():

# Set up data
prediction = np.array([0.3, 0.3, 0.2, 2.14, 0.23])
ground_truth = np.array([3.0, 3.0, 2.0, 1.0, 1.0])

# Compute and assert nDCG@3 value
assert_equal(ndcg(prediction, ground_truth, k=3).data, 0.69031878315427031)


def test_empty_ndcg():

# Set up data
prediction = np.array([])
ground_truth = np.array([])

# Assert nDCG of empty lists
assert_equal(ndcg(prediction, ground_truth).data, 0.0)


def test_ndcg_no_preferences():

# Set up data
prediction = np.array([0.3, 0.3, 0.2, 2.14, 0.23])
ground_truth = np.array([0.0, 0.0, 0.0, 0.0, 0.0])

# Compute and assert nDCG value
assert_equal(ndcg(prediction, ground_truth).data, 1.0)


def test_ndcg_negative_predictions():

# Set up data
prediction = np.array([-0.1, -0.3, 1.9, -0.9, -0.2])
ground_truth = np.array([0.0, 1.0, 1.0, 0.0, 0.0])

# Compute and assert nDCG value
assert_equal(ndcg(prediction, ground_truth).data, 0.8772153153380493)


@raises(ValueError)
def test_unequal_ndcg():

# Set up data
prediction = np.array([0.3, 0.3, 0.2])
ground_truth = np.array([3.0, 3.0, 2.0, 1.0, 1.0, 2.3])

# This should raise a ValueError because the lists aren't of equal length
ndcg(prediction, ground_truth)

0 comments on commit 668150c

Please sign in to comment.