This repository has been archived by the owner on Jun 29, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add first version of nDCG evaluation
- Loading branch information
Showing
2 changed files
with
167 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
from chainer import cuda, function | ||
|
||
|
||
class NDCG(function.Function): | ||
def __init__(self, k=0): | ||
self.k = k | ||
|
||
def forward(self, inputs): | ||
xp = cuda.get_array_module(*inputs) | ||
y, t = inputs | ||
|
||
# Assert arrays have the same shape | ||
if t.shape != y.shape: | ||
raise ValueError("Input arrays have different shapes") | ||
|
||
# Computing nDCG on empty array should just return 0.0 | ||
if t.shape[0] == 0: | ||
return xp.asarray(0.0), | ||
|
||
# Compute predicted indices by arg sorting | ||
predicted_indices = xp.argsort(y) | ||
best_indices = xp.argsort(t) | ||
|
||
# Predicted and theoretically best relevance labels | ||
predicted_relevance = xp.flip(t[predicted_indices], axis=0) | ||
best_relevance = xp.flip(t[best_indices], axis=0) | ||
|
||
# Compute needed statistics | ||
length = predicted_relevance.shape[0] | ||
arange = xp.arange(length) | ||
last = min(self.k, length) | ||
if last < 1: | ||
last = length | ||
|
||
# Compute regular DCG | ||
dcg_numerator = 2 ** predicted_relevance[:last] - 1 | ||
dcg_denominator = xp.log2(arange[:last] + 2) | ||
dcg = xp.sum(dcg_numerator / dcg_denominator) | ||
|
||
# Compute iDCG for normalization | ||
idcg_numerator = (2 ** best_relevance[:last] - 1) | ||
idcg_denominator = (xp.log2(arange[:last] + 2)) | ||
idcg = xp.sum(idcg_numerator / idcg_denominator) | ||
|
||
if idcg == 0.0: | ||
return xp.asarray(1.0), | ||
|
||
return xp.asarray(dcg / idcg), | ||
|
||
|
||
def ndcg(y, t, k=0): | ||
""" | ||
Computes the nDCG@k for given list of true relevance labels (y_true) and | ||
given list of predicted relevance labels (y_score) | ||
:param y_true: The ground truth relevance labels | ||
:param y_score: The predicted relevance scores | ||
:param k: The cut-off point (if set to smaller or equal to 0, it does not | ||
cut-off) | ||
:return: The nDCG@k value | ||
""" | ||
return NDCG(k=k)(y, t) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
import numpy as np | ||
from nose.tools import raises, assert_equal | ||
|
||
from shoelace.evaluation import ndcg | ||
|
||
|
||
def test_ndcg(): | ||
|
||
# Set up data | ||
prediction = np.array([0.1, 0.9, 0.2, 3.0, 0.15]) | ||
ground_truth = np.array([3.0, 3.0, 2.0, 1.0, 1.0]) | ||
|
||
# Compute and assert nDCG value | ||
assert_equal(ndcg(prediction, ground_truth).data, 0.73213389587665278) | ||
|
||
|
||
def test_ndcg_2(): | ||
|
||
# Set up data | ||
prediction = np.array([0.1, 0.9, 0.2, 0.15, 3.0]) | ||
ground_truth = np.array([3.0, 3.0, 2.0, 1.0, 1.0]) | ||
|
||
# Compute and assert nDCG value | ||
assert_equal(ndcg(prediction, ground_truth).data, 0.73213389587665278) | ||
|
||
|
||
def test_ndcg_3(): | ||
|
||
# Set up data | ||
prediction = np.array([0.1, 0.9, 0.2, 0.15, 3.0]) | ||
ground_truth = np.array([3.0, 3.0, 2.0, 1.0, 2.0]) | ||
|
||
# Compute and assert nDCG value | ||
assert_equal(ndcg(prediction, ground_truth).data, 0.8259562683091511) | ||
|
||
|
||
def test_ndcg_perfect(): | ||
|
||
# Set up data | ||
prediction = np.array([4.0, 3.0, 2.0, 1.0, 0.0]) | ||
ground_truth = np.array([3.0, 3.0, 2.0, 1.0, 1.0]) | ||
|
||
# Compute and assert nDCG value | ||
assert_equal(ndcg(prediction, ground_truth).data, 1.0) | ||
|
||
|
||
def test_ndcg_minimal(): | ||
|
||
# Set up data | ||
prediction = np.arange(10).astype(dtype=np.float32) | ||
ground_truth = np.flip(prediction, axis=0) | ||
|
||
# Compute and assert nDCG value | ||
assert_equal(ndcg(prediction, ground_truth).data, 0.39253964576233569) | ||
|
||
|
||
def test_ndcg_at_k(): | ||
|
||
# Set up data | ||
prediction = np.array([0.3, 0.3, 0.2, 2.14, 0.23]) | ||
ground_truth = np.array([3.0, 3.0, 2.0, 1.0, 1.0]) | ||
|
||
# Compute and assert nDCG@3 value | ||
assert_equal(ndcg(prediction, ground_truth, k=3).data, 0.69031878315427031) | ||
|
||
|
||
def test_empty_ndcg(): | ||
|
||
# Set up data | ||
prediction = np.array([]) | ||
ground_truth = np.array([]) | ||
|
||
# Assert nDCG of empty lists | ||
assert_equal(ndcg(prediction, ground_truth).data, 0.0) | ||
|
||
|
||
def test_ndcg_no_preferences(): | ||
|
||
# Set up data | ||
prediction = np.array([0.3, 0.3, 0.2, 2.14, 0.23]) | ||
ground_truth = np.array([0.0, 0.0, 0.0, 0.0, 0.0]) | ||
|
||
# Compute and assert nDCG value | ||
assert_equal(ndcg(prediction, ground_truth).data, 1.0) | ||
|
||
|
||
def test_ndcg_negative_predictions(): | ||
|
||
# Set up data | ||
prediction = np.array([-0.1, -0.3, 1.9, -0.9, -0.2]) | ||
ground_truth = np.array([0.0, 1.0, 1.0, 0.0, 0.0]) | ||
|
||
# Compute and assert nDCG value | ||
assert_equal(ndcg(prediction, ground_truth).data, 0.8772153153380493) | ||
|
||
|
||
@raises(ValueError) | ||
def test_unequal_ndcg(): | ||
|
||
# Set up data | ||
prediction = np.array([0.3, 0.3, 0.2]) | ||
ground_truth = np.array([3.0, 3.0, 2.0, 1.0, 1.0, 2.3]) | ||
|
||
# This should raise a ValueError because the lists aren't of equal length | ||
ndcg(prediction, ground_truth) |