In [42]:
"""
Evaluation metrics R-precision and Normalized discouunted cumulative gain (NDCG)
as defined by the Spotify Million Playlist Dataset Competition:
https://www.aicrowd.com/challenges/spotify-million-playlist-dataset-challenge

Code and methods follow those described by Vagliano et al:
https://pure.mpg.de/rest/items/item_3367572_1/component/file_3367573/content
"""

from collections import OrderedDict
from collections import namedtuple
import numpy as np
from scipy import stats
import pandas as pd
from numpy import genfromtxt


def r_precision(target, prediction, max_n_prediction=500):
    '''R-precision evaluation metric'''
    pred = prediction[:max_n_prediction]
    targetset = set(target)
    
    denominator = len(targetset)
    numerator = float(len(set(pred[:denominator]).intersection(targetset)))
    
    r_prec_val = numerator/denominator
    return r_prec_val

def dcg(relevant, retrieved, k, *args, **kwargs):
    '''Discounted Cumulative Gain'''
    list1 = retrieved[:k]
    retrieved = list(OrderedDict.fromkeys(list1))
    relevant = list(OrderedDict.fromkeys(relevant))
    
    if (len(relevant) == 0 or len(retrieved) == 0):
        return 0.0
    
    else:
        rel_i = [float(el in relevant) for el in retrieved]
        rel_i_len = len(rel_i)+1
        
        i_variable = 1 + np.arange(1, rel_i_len)
        denominator = np.log2(i_variable)
        
        dcg_val = np.sum(rel_i/denominator)
        return dcg_val
    
def idcg(relevant, retrieved, k, *args, **kwargs):
    '''Ideal Discounted Cumulative Gain'''
    k_min = min(k, len(relevant))
    idcg_val = dcg(relevant, relevant, k_min)
    return idcg_val

def ndcg(relevant, retrieved, k, *args, **kwargs):
    '''Normalized Discounted Cumulative Gain'''
    dcg_val = dcg(relevant, retrieved, k)
    idcg_val = idcg(relevant, retrieved, k)
    
    if idcg_val == 0:
        raise ValueError("relevent is empty, divide by 0 error")
    
    ndcg_val = dcg_val / idcg_val
    return ndcg_val


Metrics = namedtuple('Metrics', ['r_precision', 'ndcg'])

def get_all_metrics(target, prediction, k):
    '''Return tuple of each evaluation metric'''
    r_prec_val = r_precision(target, prediction, k)
    ndcg_val = ndcg(target, prediction, k)
    
    Metrics_val = Metrics(r_prec_val, ndcg_val)
    return Metrics_val

Metrics_Summary = namedtuple('MetricsSummary', ['mean_r_precision','mean_ndcg','coverage'])

def aggregate_metrics(ground_truth, sub, k, candidates):
    '''Return tuple of the means of each evaluation metric'''
    r_precision_vals = []
    ndcg_vals = []
    miss_vals = 0
    counts = 0
    
    for i in candidates:
        counts += 1
        if i not in sub:
            miss_vals += 1
            m = Metrics(0, 0, 0) 
        else:
            m = get_all_metrics(ground_truth[i], sub[i], k)
        r_precision_vals.append(m.r_precision)
        ndcg_vals.append(m.ndcg)

    coverage_val = 1 - miss_vals / float(counts)
    r_prec_mean = stats.describe(r_precision).mean
    ndcg_mean = stats.describe(ndcg).mean
    Metrics_Summary_val = Metrics_Summary(r_prec_mean,ndcg_mean,coverage_val)
    return Metrics_Summary_val



In [38]:
true_labels = np.load("datasets/testlabels.npy")
pred_labels = genfromtxt("densenet_predictions.csv", delimiter='\n')

In [31]:
true_labels.shape

(100,)

In [32]:
np.array(my_data).shape

(100,)

In [33]:
r_precision(true_labels, pred_labels)

0.018867924528301886

In [35]:
dcg(true_labels, pred_labels, 100)

1.0

In [44]:
metrics = get_all_metrics(true_labels, pred_labels, 100)

In [46]:
metrics

Metrics(r_precision=0.018867924528301886, ndcg=0.07450728404752761)