In [None]:
# default_exp metrics.ndcg

# NDCG
> Normalized Discounted Cumulative Gain.

NDCG is a metric that evaluates how well the recommender performs in recommending ranked items to users. Therefore both hit of relevant items and correctness in ranking of these items matter to the NDCG evaluation. The total NDCG score is normalized by the total number of users.

nDCG has three parts. First is ‘CG’ which stands for Cumulative Gains. It deals with the fact that most relevant items are more useful than somewhat relevant items that are more useful than irrelevant items. It sums the items based on its relevancy, hence, the term cumulative.

But CG doesn’t account for the position of the items on the list. And hence, changing the item's position won’t change the CG. This is where the second part of nDCG comes in to play i.e. ‘D’. Discounted Cumulative Gain, DCG for short, penalized the items that appear lower in the list. A relevant item appearing at the end of the list is a result of a bad recommender system and hence that item should be discounted to indicate the bad performance of the model.

nDCG normalized the DCG values of the different number of the items lists. To do so we sort the item list by relevancy and calculate the DCG for that list. This will be the perfect DCG score as items are sorted by their relevancy score.

<img src='https://github.com/recohut/reco-static/raw/master/media/images/metrics/ndcg.png'>

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#export
import numpy as np

In [None]:
#export
def ndcg_at_k(y_true_list, y_reco_list, users=None, k=10, next_item=False,
              all_item=False):
    if next_item:
        ndcg_all = []
        y_true_list = y_true_list.tolist()
        y_reco_list = y_reco_list.tolist()
        for y_true, y_reco in zip(y_true_list, y_reco_list):
            if y_true in y_reco:
                index = y_reco.index(y_true)
                ndcg = 1. / np.log2(index + 2)
            else:
                ndcg = 0.
            ndcg_all.append(ndcg)
        return np.mean(ndcg_all)

    elif all_item:
        ndcg_all = []
        users = users.tolist()
        y_reco_list = y_reco_list.tolist()
        for i in range(len(y_reco_list)):
            y_true = y_true_list[users[i]]
            y_reco = y_reco_list[i]
            ndcg_all.append(ndcg_one(y_true, y_reco, k))
        return np.mean(ndcg_all)

    else:
        ndcg_all = list()
        for u in users:
            y_true = y_true_list[u]
            y_reco = y_reco_list[u]
            ndcg_all.append(ndcg_one(y_true, y_reco, k))
        return np.mean(ndcg_all)

In [None]:
#export
def ndcg_one(y_true, y_reco, k):
    rank_list = np.zeros(k)
    common_items, indices_in_true, indices_in_reco = np.intersect1d(
        y_true, y_reco, assume_unique=False, return_indices=True)

    if common_items.size > 0:
        rank_list[indices_in_reco] = 1
        ideal_list = np.sort(rank_list)[::-1]
        #  np.sum(rank_list / np.log2(2, k+2))
        dcg = np.sum(rank_list / np.log2(np.arange(2, k + 2)))
        idcg = np.sum(ideal_list / np.log2(np.arange(2, k + 2)))
        ndcg = dcg / idcg
    else:
        ndcg = 0.
    return ndcg

In [None]:
#export
def dcg_at_k(r, k, method=0):
    """
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
        k: Number of results to consider
        method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
                If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
    Returns:
        Discounted cumulative gain
    """
    r = np.asfarray(r)[:k]
    if r.size:
        if method == 0:
            return r[0] + np.sum(r[1:] / np.log2(np.arange(2, r.size + 1)))
        elif method == 1:
            return np.sum(r / np.log2(np.arange(2, r.size + 2)))
        else:
            raise ValueError('method must be 0 or 1.')
    return 0.

In [None]:
result = dcg_at_k([3, 2, 3, 0, 1, 2], 6)
result

8.097171433256849

In [None]:
assert np.round(result,4)==8.0972

In [None]:
#export
def ndcg_at_k_v2(r, k, method=0):
    dcg_max = dcg_at_k(sorted(r, reverse=True), k, method)
    if not dcg_max:
        return 0.
    return dcg_at_k(r, k, method) / dcg_max

In [None]:
result = ndcg_at_k_v2([3, 2, 3, 0, 1, 2], 6, method=1)
result

0.9608081943360617

In [None]:
assert np.round(result,4)==0.9608

In [None]:
#hide
%reload_ext watermark
%watermark -a "Sparsh A." -m -iv -u -t -d

Author: Sparsh A.

Last updated: 2021-12-24 06:57:01

Compiler    : GCC 7.5.0
OS          : Linux
Release     : 5.4.144+
Machine     : x86_64
Processor   : x86_64
CPU cores   : 2
Architecture: 64bit

IPython: 5.5.0
numpy  : 1.19.5

