In [3]:
import json
import numpy as np

In [10]:
data = json.load(open('minimal_data.json'))
magic_weights = np.array(data['magic_weights'])
test_data = data['test']

In [13]:
# copy-paste from ../letor_metrics.py

def dcg_from_ranking(y_true, ranking):
    """Discounted cumulative gain (DCG) at rank k

    Parameters
    ----------
    y_true : array-like, shape = [n_samples]
        Ground truth (true relevance labels).

    ranking : array-like, shape = [k]
        Document indices, i.e.,
            ranking[0] is the index of top-ranked document,
            ranking[1] is the index of second-ranked document,
            ...

    k : int
        Rank.

    Returns
    -------
    DCG @k : float
    """
    y_true = np.asarray(y_true)
    ranking = np.asarray(ranking)
    rel = y_true[ranking]
    gains = 2 ** rel - 1
    discounts = np.log2(np.arange(len(ranking)) + 2)
    return np.sum(gains / discounts)


def ndcg_from_ranking(y_true, ranking, k=None):
    """Normalized discounted cumulative gain (NDCG) at rank k

    Parameters
    ----------
    y_true : array-like, shape = [n_samples]
        Ground truth (true relevance labels).

    ranking : array-like, shape = [k]
        Document indices, i.e.,
            ranking[0] is the index of top-ranked document,
            ranking[1] is the index of second-ranked document,
            ...

    k : int
        Rank.

    Returns
    -------
    NDCG @k : float
    """
    if k is None:
        k = len(ranking)
    best_ranking = np.argsort(y_true)[::-1]
    best = dcg_from_ranking(y_true, best_ranking[:k])
    return dcg_from_ranking(y_true, ranking[:k]) / best



In [15]:
ndcg_vals = []
for u in test_data.keys():
    # make ranking
    items_for_u = [i for (i, r) in test_data[u]]
    response = magic_weights[np.array(items_for_u).astype(np.int32)]
    predicted_ranking = np.argsort(-response)
    
    # true relevances
    true_relevances = np.array([r for (i, r) in test_data[u]])
    
    # calc score
    gain = ndcg_from_ranking(true_relevances, predicted_ranking, 10)
    ndcg_vals.append(gain)
    
print(np.mean(ndcg_vals))

0.740121606531
