In [31]:
import numpy as np
import heapq
import itertools
import sys
import torch
from utils import myout
import numpy as np
from scipy import stats
from sklearn import metrics

In [4]:
def argmax_top_k(a, top_k=50):
    ele_idx = heapq.nlargest(top_k, zip(a, itertools.count()))
    return np.array([idx for ele, idx in ele_idx], dtype=np.intc)

def precision(rank, ground_truth):
    hits = [1 if item in ground_truth else 0 for item in rank]
    result = np.cumsum(hits, dtype=np.float)/np.arange(1, len(rank)+1)
    return result


def recall(rank, ground_truth):
    hits = [1 if item in ground_truth else 0 for item in rank]
    result = np.cumsum(hits, dtype=np.float) / len(ground_truth)
    return result


def map(rank, ground_truth):
    pre = precision(rank, ground_truth)
    pre = [pre[idx] if item in ground_truth else 0 for idx, item in enumerate(rank)]
    sum_pre = np.cumsum(pre, dtype=np.float32)
    gt_len = len(ground_truth)
    # len_rank = np.array([min(i, gt_len) for i in range(1, len(rank)+1)])
    result = sum_pre/gt_len
    return result


def ndcg(rank, ground_truth):
    len_rank = len(rank)
    len_gt = len(ground_truth)
    idcg_len = min(len_gt, len_rank)

    # calculate idcg
    idcg = np.cumsum(1.0 / np.log2(np.arange(2, len_rank + 2)))
    idcg[idcg_len:] = idcg[idcg_len-1]

    # idcg = np.cumsum(1.0/np.log2(np.arange(2, len_rank+2)))
    dcg = np.cumsum([1.0/np.log2(idx+2) if item in ground_truth else 0.0 for idx, item in enumerate(rank)])
    result = dcg/idcg
    return result


def mrr(rank, ground_truth):
    last_idx = sys.maxsize
    for idx, item in enumerate(rank):
        if item in ground_truth:
            last_idx = idx
            break
    result = np.zeros(len(rank), dtype=np.float32)
    result[last_idx:] = 1.0/(last_idx+1)
    return result

In [13]:
pred = [5, 5, 4, 0, 1, 2, 3]
gt = [0, 1, 2, 3, 4]
mrr(pred, gt)

array([0.        , 0.        , 0.33333334, 0.33333334, 0.33333334,
       0.33333334, 0.33333334], dtype=float32)

In [24]:
y_pred_pos = torch.rand(2)
y_pred_neg = torch.rand(2, 10)
myout(y_pred_pos, y_pred_neg)

y_pred_pos : shape=torch.Size([2]), tensor([0.7000, 0.8801])
y_pred_neg : shape=torch.Size([2, 10])
tensor([[0.2917, 0.7910, 0.7456, 0.5775, 0.8039, 0.7220, 0.5861, 0.8793, 0.3455,
         0.1295],
        [0.2793, 0.3745, 0.3292, 0.0028, 0.1132, 0.3431, 0.5361, 0.2225, 0.9813,
         0.8049]])


## mrr

In [25]:
y_pred = torch.cat([y_pred_pos.view(-1,1), y_pred_neg], dim = 1) # [b, k+1]
argsort = torch.argsort(y_pred, dim = 1, descending = True) # [b, k+1]
myout(y_pred, argsort)

y_pred : shape=torch.Size([2, 11])
tensor([[0.7000, 0.2917, 0.7910, 0.7456, 0.5775, 0.8039, 0.7220, 0.5861, 0.8793,
         0.3455, 0.1295],
        [0.8801, 0.2793, 0.3745, 0.3292, 0.0028, 0.1132, 0.3431, 0.5361, 0.2225,
         0.9813, 0.8049]])
argsort : shape=torch.Size([2, 11])
tensor([[ 8,  5,  2,  3,  6,  0,  7,  4,  9,  1, 10],
        [ 9,  0, 10,  7,  2,  6,  3,  1,  8,  5,  4]])


In [26]:
ranking_list = torch.nonzero(argsort == 0, as_tuple=False) 
ranking_list = ranking_list[:, 1] + 1
hits5_list = (ranking_list <= 5).to(torch.float)
myout(ranking_list, hits5_list)

ranking_list : shape=torch.Size([2]), tensor([6, 2])
hits5_list : shape=torch.Size([2]), tensor([0., 1.])


In [27]:
mrr_list = 1./ranking_list.to(torch.float)
mrr = mrr_list.mean()
myout(mrr)

mrr : shape=torch.Size([]), 0.3333333432674408


## NDCG

In [33]:
y_pred = torch.cat([y_pred_pos.view(-1,1), y_pred_neg], dim = 1) # [b, k+1]
true_rel = torch.zeros_like(y_pred) # [b, k+1]
true_rel[:, 0] = 1
ndcg = metrics.ndcg_score(true_rel, y_pred)
myout(y_pred, true_rel, ndcg)

y_pred : shape=torch.Size([2, 11])
tensor([[0.7000, 0.2917, 0.7910, 0.7456, 0.5775, 0.8039, 0.7220, 0.5861, 0.8793,
         0.3455, 0.1295],
        [0.8801, 0.2793, 0.3745, 0.3292, 0.0028, 0.1132, 0.3431, 0.5361, 0.2225,
         0.9813, 0.8049]])
true_rel : shape=torch.Size([2, 11])
tensor([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])
ndcg = 0.49356847033973983


## spearman

In [41]:
cor, _ = stats.spearmanr(true_rel.view(-1), y_pred.view(-1))
myout(cor)

cor = 0.2990668717675361
