In [1]:
# for full explanations of diversity metrics, see this paper: 
# "Diversity in recommender systems - A survey"
# https://www.sciencedirect.com/science/article/abs/pii/S0950705117300680

# every song first has to be converted to a vector for most of these metrics to work
# for now I'm just going to use randomized vectors to test
# moving forward song vectorization will be a big task for us

In [4]:
import numpy as np

In [9]:
# for now, let the list of songs be this list of n vectors:

n = 1000
song_list = []
song_dimension = 100

for i in range(n):
    song_list.append(np.random.rand(10,1))

In [None]:
# average dissimilarity

# very straightforward assuming there are good song vectorizations as input

def avg_dis(song_list):
    
    sim = 0
    
    n = len(song_list)
    
    for i in range(n):
        for j in range(n):
            sim += 1 - np.linalg.norm(song_list[i]-song_list[j])
            
    return sim/(n/2*(n-1))

In [None]:
# Gini coefficient

# can only be implemented for a simulated environment (e.g. during a large user study)
# therefore this metric is likely not applicable at this point


In [None]:
# nDCG measure

# returns a "gain vector" G

# needs to be fed a vector containing the value of J(song_list[i]) for each song, whereby J(song_list[i])
# is 1 if the user finds it relevant and 0 otherwise

# also fed a tunable hyperparameter, alpha

# k is the index of the specific song whose diversity score you want to find

# note this algorithm was originally intended for a pretty different purpose (document retrieval)
# so I'm not sure how well this mod will work

def nDCG(song_list, J_song_list, alpha, k):
    
    score = 0
    r = 0
    
    for i in range(len(song_list)):
        for j in range(k - 1):
            r += J_song_list[i]
    
    for i in range(len(song_list)):
        score += J_song_list[i]*(1-alpha)**r[i]
        
    return score    

In [None]:
# product of relevance, similarity, places in ranked list






In [None]:
# user-perceived diversity

# cannot be coded here obviously - requires user testing

In [None]:
# modification of nDCG measure







In [None]:
# modification of average dissimilarity

# exact same as original version except using user feedback as measure of similarity

# pass in matrix of user-defined similarity scores (e.g. based on average of Likert scale responses)

def avg_dis(song_list, user_similarity_scores):
    
    sim = 0
    
    n = len(song_list)
    
    for i in range(n):
        for j in range(n):
            sim += 1 - user_similarity_scores[i][j]
            
    return sim/(n/2*(n-1))

In [None]:
# combination of genre coverage and non-redundancy

# Number of distinct items in recommendation list

def num_distinct(song_list):
  return len(np.unique(song_list))