**TODO**
- Dividir particion de test de las playlist
- Definir las metricas
- Obtener el embedding de todos los tracks (set de train y test unido)
    All tracks in the challenge set appear in the MPD
    All holdout tracks appear in the MPD
- Obtener recomendaciones segun cada categoria de challenge (title, first track, 5, etc) (10 o 15 recs para cada playlist de test)
- Obtener letras de las canciones y analizarlas

In [1]:
from transformers import BertModel, BertTokenizer
import torch
import numpy as np
from sklearn.metrics import pairwise_distances
import json
from google.colab import drive
import heapq
import math
import matplotlib.pyplot as plt
import random
from itertools import combinations

In [2]:
drive.mount('/content/drive')

Mounted at /content/drive


### Cargamos el modelo

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
model_name = "bert-base-cased"
tokenizer = BertTokenizer.from_pretrained(model_name)
bert_model = BertModel.from_pretrained(model_name).to(device)

tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

### Cargamos los datos necesarios

In [5]:
datadir = '/content/drive/MyDrive/UC/Ramos/Sis. Rec./project/data'

In [6]:
with open(datadir + '/playlistid_itemid_1000p_train.txt', 'r') as f:
    lines = f.readlines()

# Discinario que relaciona cada playlist con sus tracks
playlists_tracks = {}

# Set de todos los tracks
tracks = set()

# Dicionario que guarda que tan popular es un set
popularities = {}


for line in lines:
    items = line.replace('\n','').split(' ')
    if int(items[0]) not in playlists_tracks:
        playlists_tracks[int(items[0])] = []
    playlists_tracks[int(items[0])].append(int(items[1]))
    tracks.add(int(items[1]))
    if int(items[1]) not in popularities:
        popularities[int(items[1])] = 0
    popularities[int(items[1])] += 1

In [7]:
with open(datadir + '/test_challenge_200p.txt', 'r') as f:
    lines = f.readlines()

# Discinario que relaciona cada playlist con sus tracks
test_playlists_tracks = {}

for line in lines:
    items = line.replace('\n','').split(' ')
    if int(items[0]) not in test_playlists_tracks:
        test_playlists_tracks[int(items[0])] = []
    test_playlists_tracks[int(items[0])].append(int(items[1]))

In [8]:
# Leemos los embeddings de cada track
track_embeding_matrix_file = f"{datadir}/track_names_embedding_matrix.pt"
tracks_embedding_matrix = torch.load(track_embeding_matrix_file)
tracks_embedding_matrix.shape

  tracks_embedding_matrix = torch.load(track_embeding_matrix_file)


(34443, 768)

In [9]:
track_lyrics_embedding_matrix_file = f"{datadir}/track_lyrics_embedding_matrix.pt"
track_lyrics_embedding_matrix = torch.load(track_lyrics_embedding_matrix_file)
track_lyrics_embedding_matrix.shape

  track_lyrics_embedding_matrix = torch.load(track_lyrics_embedding_matrix_file)


(34443, 768)

In [10]:
track_names_and_lyrics_embedding_matrix_file = f"{datadir}/track_names_and_lyrics_embedding_matrix.pt"
track_names_and_lyrics_embedding_matrix = torch.load(track_names_and_lyrics_embedding_matrix_file)
track_names_and_lyrics_embedding_matrix.shape

  track_names_and_lyrics_embedding_matrix = torch.load(track_names_and_lyrics_embedding_matrix_file)


(34443, 768)

### Definimos las funciones que usaremos para las metricas

In [11]:
def id2genre(idx):
  uri = idx2uri[str(idx)]
  if uri in genres:
    return genres[uri]
  else:
    return 'None'

In [12]:
def dcg_at_k(r, k):
    r = np.asfarray(r)[:k]
    if r.size:
        return np.sum((np.power(2, r) - 1) / np.log2(np.arange(2, r.size + 2)))
    return 0.0

def idcg_at_k(k):
    return dcg_at_k(np.ones(k), k)

def ndcg_at_k(r, k, max_relevant):
    idcg = idcg_at_k(min(k, max_relevant))
    if not idcg:
        return 0.0
    return dcg_at_k(r, k) / idcg

def calculate_ndcg(recommendations, relevant_items, k=10):
    """
    Calculate ndcg@k given recommendations and relevant items.

    Parameters:
        recommendations (list): List of recommended items (ordered by ranking).
        relevant_items (list): List of relevant items.
        k (int): The value of k for ndcg@k (default is 10).

    Returns:
        float: The ndcg@k score.
    """
    # Create a relevance vector: 1 if the item is relevant, 0 otherwise
    relevance = [1 if item in relevant_items else 0 for item in recommendations]

    # Calculate ndcg@k
    return ndcg_at_k(relevance, k, len(relevant_items))

def recall_at_k(relevant_items, recommended_items, k):
    relevant_items = set(relevant_items)
    recommended_items = set(recommended_items[:k])
    intersection = relevant_items.intersection(recommended_items)
    recall = len(intersection) / len(relevant_items)
    return recall

def precision_at_k(relevant_items, recommended_items, k):
    """
    Calcula la precisión en los primeros k elementos recomendados.

    Parameters:
        relevant_items (list): Lista de elementos relevantes.
        recommended_items (list): Lista ordenada de elementos recomendados.
        k (int): Número de elementos recomendados a considerar.

    Returns:
        float: Precision@k.
    """
    relevant_items = set(relevant_items)
    recommended_items = recommended_items[:k]
    relevant_recommended = [item for item in recommended_items if item in relevant_items]
    precision = len(relevant_recommended) / k
    return precision

In [13]:
def dcg(recommended_items, rel_items_labels):
  dcg_value = 0
  for idx, rel in enumerate(rel_items_labels):
    i = idx + 1
    dcg_value += ((2**rel-1)/math.log2(i + 1))
  return dcg_value

def ndcg_eq(recommended_items, relevant_items, k=10):
  recommended_items = recommended_items[:k]
  rel_items_labels = np.isin(recommended_items, relevant_items)
  count_rel_items = np.sum(rel_items_labels)
  ideal_rel_items_labels = np.concatenate([np.ones(count_rel_items, dtype=int), np.zeros(len(rel_items_labels) - count_rel_items, dtype=int)])
  dcg_value = dcg(recommended_items, rel_items_labels)
  ideal_dcg_value = dcg(recommended_items, ideal_rel_items_labels)
  ndcg_value = 0
  if dcg_value != 0:
    ndcg_value = dcg_value / ideal_dcg_value
  return ndcg_value

In [14]:
def dcg(recommended_items, rel_items_labels):
  dcg_value = 0
  for idx, rel in enumerate(rel_items_labels):
    i = idx + 1
    dcg_value += ((2**rel-1)/math.log2(i + 1))
  return dcg_value

def ndcg_eq(recommended_items, relevant_items, k=10):
  recommended_items = recommended_items[:k]
  rel_items_labels = np.isin(recommended_items, relevant_items)
  count_rel_items = np.sum(rel_items_labels)
  ideal_rel_items_labels = np.concatenate([np.ones(count_rel_items, dtype=int), np.zeros(len(rel_items_labels) - count_rel_items, dtype=int)])
  dcg_value = dcg(recommended_items, rel_items_labels)
  ideal_dcg_value = dcg(recommended_items, ideal_rel_items_labels)
  ndcg_value = 0
  if dcg_value != 0:
    ndcg_value = dcg_value / ideal_dcg_value
  return ndcg_value

In [15]:
def get_metrics(recommendations, relevant_items, k=10):
    #ndcg = calculate_ndcg(recommendations, relevant_items, k)
    ndcg = ndcg_eq(recommendations, relevant_items, k)
    recall = recall_at_k(relevant_items, recommendations, k)
    precision = precision_at_k(relevant_items, recommendations, k)
    diversity = diversity_at_n(recommendations, k)
    return ndcg, recall, precision, diversity

In [16]:
def diversity_at_n(recommendations, n):
    genres = [id2genre(id) for id in recommendations]
    genres = genres[:n]
    num_elements = len(genres)

    if num_elements <= 1:
        return 0

    def distance(genre1, genre2):
        return 1 if genre1 != genre2 else 0

    pairwise_distances = [
        distance(g1, g2) for g1, g2 in combinations(genres, 2)
    ]
    return sum(pairwise_distances) / len(pairwise_distances)

In [17]:
def sentence_to_embedding(sentence, tokenizer=tokenizer):
    inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True, max_length=128)
    inputs = {key: value.to(device) for key, value in inputs.items()}
    with torch.no_grad():
        outputs = bert_model(**inputs)
    last_hidden_states = outputs.last_hidden_state
    sentence_embedding = last_hidden_states[:, 0, :].squeeze()
    return sentence_embedding.cpu().numpy()

In [18]:
def read_json_file(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

In [19]:
idx2name_file = f'{datadir}/idx2name.json'
idx2name = read_json_file(idx2name_file)

In [20]:
playlists_names = read_json_file(f'{datadir}/mpd.slice.0-999.json')['playlists']

In [21]:
idx2uri = read_json_file(f'{datadir}/idx2uri.json')
genres = read_json_file(f'{datadir}/uri2genres.json')

In [22]:
def find_similar(embedding, query_id, metric='cosine', topk=25, kept = 1):
    n = embedding.shape[0]
    query = sentence_to_embedding(query_id).reshape(1,-1)
    distances = pairwise_distances(query, embedding, metric=metric)
    heap = []
    for i in range(n):
        if len(heap) < topk + kept:
            heapq.heappush(heap, (-distances[0][i], i))
        else:
            heapq.heappushpop(heap, (-distances[0][i], i))
    heap.sort(reverse=True)
    # print(heap)
    rec_ids = [i+1 for _,i in heap]
    return rec_ids[kept:]

In [23]:
def find_similar_str(embedding, query_id, metric='cosine', topk=25, kept = 1):
    n = embedding.shape[0]
    query = sentence_to_embedding(query_id).reshape(1,-1)
    distances = pairwise_distances(query, embedding, metric=metric)
    heap = []
    for i in range(n):
        if len(heap) < topk + kept:
            heapq.heappush(heap, (-distances[0][i], i))
        else:
            heapq.heappushpop(heap, (-distances[0][i], i))
    heap.sort(reverse=True)
    # print(heap)
    rec_ids = [idx2name[str(i+1)] for _,i in heap]
    return rec_ids[kept:]

In [24]:
find_similar_str(tracks_embedding_matrix,'Perfect')

['Perfect',
 'Perfect',
 'Perfect',
 'Perfect',
 'Perfect',
 'Perfectly Perfect',
 "Nobody's Perfect",
 "Nobody's Perfect",
 'Perfect Day',
 'Perfect Day',
 'Perfect Day',
 'Perfect Storm',
 'Wonderful',
 'Wonderful',
 'Wonderful',
 'Perfect for Me',
 'Everything is Honey',
 'Life Is Beautiful',
 "Tonight You're Perfect",
 'Beautiful',
 'Beautiful',
 'Beautiful',
 'Beautiful',
 'Beautiful',
 'Beautiful']

In [26]:
size = 35

# Random
ndcgs10 = []
recalls10 = []
precisions10 = []
diversitys10 = []

ndcgs20 = []
recalls20 = []
precisions20 = []
diversitys20 = []

ndcgs25 = []
recalls25 = []
precisions25 = []
diversitys25 = []

tracks = list(tracks)

for playlist in test_playlists_tracks.keys():
    rel = test_playlists_tracks[playlist]
    rec = random.sample(tracks, 25)

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 10)

    ndcgs10.append(ndcg)
    recalls10.append(recall)
    precisions10.append(precision)
    diversitys10.append(diversity)

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 20)

    ndcgs20.append(ndcg)
    recalls20.append(recall)
    precisions20.append(precision)
    diversitys20.append(diversity)

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 25)

    ndcgs25.append(ndcg)
    recalls25.append(recall)
    precisions25.append(precision)
    diversitys25.append(diversity)

ndcg10 = np.mean(ndcgs10)
recall10 = np.mean(recalls10)
precision10 = np.mean(precisions10)
diversity10 = np.mean(diversitys10)

ndcg20 = np.mean(ndcgs20)
recall20 = np.mean(recalls20)
precision20 = np.mean(precisions20)
diversity20 = np.mean(diversitys20)

ndcg25 = np.mean(ndcgs25)
recall25 = np.mean(recalls25)
precision25 = np.mean(precisions25)
diversity25 = np.mean(diversitys25)

print(f' '*size,'ndcg@10  recall@10  precision@10  diversity@10  ndcg@20  recall@20  precision@20  diversity@20  ndcg@25  recall@25  precision@25  diversity@25')
print('-'*175)

name = 'Random'

name += ' '*(size - len(name))

print(name, end=' ')
print(f'{ndcg10:.5f}  {recall10:.5f}    {precision10:.5f}       {diversity10:.5f}       {ndcg20:.5f}  {recall20:.5f}    {precision20:.5f}       {diversity20:.5f}       {ndcg25:.5f}  {recall25:.5f}    {precision25:.5f}       {diversity25:.5f}')

# Most popular
popular_tracks = sorted(popularities, key=popularities.get, reverse=True)
rec = popular_tracks[:25]

ndcgs10 = []
recalls10 = []
precisions10 = []
diversitys10 = []

ndcgs20 = []
recalls20 = []
precisions20 = []
diversitys20 = []

ndcgs25 = []
recalls25 = []
precisions25 = []
diversitys25 = []

for playlist in test_playlists_tracks.keys():
    rel = test_playlists_tracks[playlist]

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 10)

    ndcgs10.append(ndcg)
    recalls10.append(recall)
    precisions10.append(precision)
    diversitys10.append(diversity)

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 20)

    ndcgs20.append(ndcg)
    recalls20.append(recall)
    precisions20.append(precision)
    diversitys20.append(diversity)

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 25)

    ndcgs25.append(ndcg)
    recalls25.append(recall)
    precisions25.append(precision)
    diversitys25.append(diversity)

ndcg10 = np.mean(ndcgs10)
recall10 = np.mean(recalls10)
precision10 = np.mean(precisions10)
diversity10 = np.mean(diversitys10)

ndcg20 = np.mean(ndcgs20)
recall20 = np.mean(recalls20)
precision20 = np.mean(precisions20)
diversity20 = np.mean(diversitys20)

ndcg25 = np.mean(ndcgs25)
recall25 = np.mean(recalls25)
precision25 = np.mean(precisions25)
diversity25 = np.mean(diversitys25)

name = 'Most Popular'

name += ' '*(size - len(name))

print(name, end=' ')
print(f'{ndcg10:.5f}  {recall10:.5f}    {precision10:.5f}       {diversity10:.5f}       {ndcg20:.5f}  {recall20:.5f}    {precision20:.5f}       {diversity20:.5f}       {ndcg25:.5f}  {recall25:.5f}    {precision25:.5f}       {diversity25:.5f}')

# ContentBased Title (Name)

ndcgs10 = []
recalls10 = []
precisions10 = []
diversitys10 = []

ndcgs20 = []
recalls20 = []
precisions20 = []
diversitys20 = []

ndcgs25 = []
recalls25 = []
precisions25 = []
diversitys25 = []

for playlist in test_playlists_tracks.keys():
    rec = find_similar(tracks_embedding_matrix, playlists_names[playlist]['name'])
    rel = test_playlists_tracks[playlist]

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 10)

    ndcgs10.append(ndcg)
    recalls10.append(recall)
    precisions10.append(precision)
    diversitys10.append(diversity)

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 20)

    ndcgs20.append(ndcg)
    recalls20.append(recall)
    precisions20.append(precision)
    diversitys20.append(diversity)

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 25)

    ndcgs25.append(ndcg)
    recalls25.append(recall)
    precisions25.append(precision)
    diversitys25.append(diversity)

ndcg10 = np.mean(ndcgs10)
recall10 = np.mean(recalls10)
precision10 = np.mean(precisions10)
diversity10 = np.mean(diversitys10)

ndcg20 = np.mean(ndcgs20)
recall20 = np.mean(recalls20)
precision20 = np.mean(precisions20)
diversity20 = np.mean(diversitys20)

ndcg25 = np.mean(ndcgs25)
recall25 = np.mean(recalls25)
precision25 = np.mean(precisions25)
diversity25 = np.mean(diversitys25)

name = 'CB Title (Name)'

name += ' '*(size - len(name))

print(name, end=' ')
print(f'{ndcg10:.5f}  {recall10:.5f}    {precision10:.5f}       {diversity10:.5f}       {ndcg20:.5f}  {recall20:.5f}    {precision20:.5f}       {diversity20:.5f}       {ndcg25:.5f}  {recall25:.5f}    {precision25:.5f}       {diversity25:.5f}')

# 1 track (Name)

ndcgs10 = []
recalls10 = []
precisions10 = []
diversitys10 = []

ndcgs20 = []
recalls20 = []
precisions20 = []
diversitys20 = []

ndcgs25 = []
recalls25 = []
precisions25 = []
diversitys25 = []

for playlist in test_playlists_tracks.keys():
    rel = test_playlists_tracks[playlist]
    sentence = idx2name[str(rel[0])]
    rec = find_similar(tracks_embedding_matrix, sentence)
    rec = rec[1:]

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 10)

    ndcgs10.append(ndcg)
    recalls10.append(recall)
    precisions10.append(precision)
    diversitys10.append(diversity)

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 20)

    ndcgs20.append(ndcg)
    recalls20.append(recall)
    precisions20.append(precision)
    diversitys20.append(diversity)

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 25)

    ndcgs25.append(ndcg)
    recalls25.append(recall)
    precisions25.append(precision)
    diversitys25.append(diversity)

ndcg10 = np.mean(ndcgs10)
recall10 = np.mean(recalls10)
precision10 = np.mean(precisions10)
diversity10 = np.mean(diversitys10)

ndcg20 = np.mean(ndcgs20)
recall20 = np.mean(recalls20)
precision20 = np.mean(precisions20)
diversity20 = np.mean(diversitys20)

ndcg25 = np.mean(ndcgs25)
recall25 = np.mean(recalls25)
precision25 = np.mean(precisions25)
diversity25 = np.mean(diversitys25)

name = 'CB 1 track (Name)'

name += ' '*(size - len(name))

print(name, end=' ')
print(f'{ndcg10:.5f}  {recall10:.5f}    {precision10:.5f}       {diversity10:.5f}       {ndcg20:.5f}  {recall20:.5f}    {precision20:.5f}       {diversity20:.5f}       {ndcg25:.5f}  {recall25:.5f}    {precision25:.5f}       {diversity25:.5f}')

# 5 tracks (Name)

ndcgs10 = []
recalls10 = []
precisions10 = []
diversitys10 = []

ndcgs20 = []
recalls20 = []
precisions20 = []
diversitys20 = []

ndcgs25 = []
recalls25 = []
precisions25 = []
diversitys25 = []

for playlist in test_playlists_tracks.keys():
    rel = test_playlists_tracks[playlist]
    sentence = idx2name[str(rel[0])] + ' ' + idx2name[str(rel[1])] + ' ' + idx2name[str(rel[2])] + ' ' + idx2name[str(rel[3])] + ' ' + idx2name[str(rel[4])]
    rec = find_similar(tracks_embedding_matrix, sentence)
    rec = rec[5:]

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 10)

    ndcgs10.append(ndcg)
    recalls10.append(recall)
    precisions10.append(precision)
    diversitys10.append(diversity)

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 20)

    ndcgs20.append(ndcg)
    recalls20.append(recall)
    precisions20.append(precision)
    diversitys20.append(diversity)

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 25)

    ndcgs25.append(ndcg)
    recalls25.append(recall)
    precisions25.append(precision)
    diversitys25.append(diversity)

ndcg10 = np.mean(ndcgs10)
recall10 = np.mean(recalls10)
precision10 = np.mean(precisions10)
diversity10 = np.mean(diversitys10)

ndcg20 = np.mean(ndcgs20)
recall20 = np.mean(recalls20)
precision20 = np.mean(precisions20)
diversity20 = np.mean(diversitys20)

ndcg25 = np.mean(ndcgs25)
recall25 = np.mean(recalls25)
precision25 = np.mean(precisions25)
diversity25 = np.mean(diversitys25)

name = 'CB 5 tracks (Name)'

name += ' '*(size - len(name))

print(name, end=' ')
print(f'{ndcg10:.5f}  {recall10:.5f}    {precision10:.5f}       {diversity10:.5f}       {ndcg20:.5f}  {recall20:.5f}    {precision20:.5f}       {diversity20:.5f}       {ndcg25:.5f}  {recall25:.5f}    {precision25:.5f}       {diversity25:.5f}')



# ContentBased (Lyrics) Title

ndcgs10 = []
recalls10 = []
precisions10 = []
diversitys10 = []

ndcgs20 = []
recalls20 = []
precisions20 = []
diversitys20 = []

ndcgs25 = []
recalls25 = []
precisions25 = []
diversitys25 = []

for playlist in test_playlists_tracks.keys():
    rec = find_similar(track_lyrics_embedding_matrix, playlists_names[playlist]['name'])
    rel = test_playlists_tracks[playlist]

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 10)

    ndcgs10.append(ndcg)
    recalls10.append(recall)
    precisions10.append(precision)
    diversitys10.append(diversity)

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 20)

    ndcgs20.append(ndcg)
    recalls20.append(recall)
    precisions20.append(precision)
    diversitys20.append(diversity)

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 25)

    ndcgs25.append(ndcg)
    recalls25.append(recall)
    precisions25.append(precision)
    diversitys25.append(diversity)

ndcg10 = np.mean(ndcgs10)
recall10 = np.mean(recalls10)
precision10 = np.mean(precisions10)
diversity10 = np.mean(diversitys10)

ndcg20 = np.mean(ndcgs20)
recall20 = np.mean(recalls20)
precision20 = np.mean(precisions20)
diversity20 = np.mean(diversitys20)

ndcg25 = np.mean(ndcgs25)
recall25 = np.mean(recalls25)
precision25 = np.mean(precisions25)
diversity25 = np.mean(diversitys25)

name = 'CB (Lyrics) Title'

name += ' '*(size - len(name))

print(name, end=' ')
print(f'{ndcg10:.5f}  {recall10:.5f}    {precision10:.5f}       {diversity10:.5f}       {ndcg20:.5f}  {recall20:.5f}    {precision20:.5f}       {diversity20:.5f}       {ndcg25:.5f}  {recall25:.5f}    {precision25:.5f}       {diversity25:.5f}')

# 1 track (Lyrics)

ndcgs10 = []
recalls10 = []
precisions10 = []
diversitys10 = []

ndcgs20 = []
recalls20 = []
precisions20 = []
diversitys20 = []

ndcgs25 = []
recalls25 = []
precisions25 = []
diversitys25 = []

for playlist in test_playlists_tracks.keys():
    rel = test_playlists_tracks[playlist]
    sentence = idx2name[str(rel[0])]
    rec = find_similar(track_lyrics_embedding_matrix, sentence)
    rec = rec[1:]

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 10)

    ndcgs10.append(ndcg)
    recalls10.append(recall)
    precisions10.append(precision)
    diversitys10.append(diversity)

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 20)

    ndcgs20.append(ndcg)
    recalls20.append(recall)
    precisions20.append(precision)
    diversitys20.append(diversity)

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 25)

    ndcgs25.append(ndcg)
    recalls25.append(recall)
    precisions25.append(precision)
    diversitys25.append(diversity)

ndcg10 = np.mean(ndcgs10)
recall10 = np.mean(recalls10)
precision10 = np.mean(precisions10)
diversity10 = np.mean(diversitys10)

ndcg20 = np.mean(ndcgs20)
recall20 = np.mean(recalls20)
precision20 = np.mean(precisions20)
diversity20 = np.mean(diversitys20)

ndcg25 = np.mean(ndcgs25)
recall25 = np.mean(recalls25)
precision25 = np.mean(precisions25)
diversity25 = np.mean(diversitys25)

name = 'CB 1 track (Lyrics)'

name += ' '*(size - len(name))

print(name, end=' ')
print(f'{ndcg10:.5f}  {recall10:.5f}    {precision10:.5f}       {diversity10:.5f}       {ndcg20:.5f}  {recall20:.5f}    {precision20:.5f}       {diversity20:.5f}       {ndcg25:.5f}  {recall25:.5f}    {precision25:.5f}       {diversity25:.5f}')

# 5 tracks (Lyrics)

ndcgs10 = []
recalls10 = []
precisions10 = []
diversitys10 = []

ndcgs20 = []
recalls20 = []
precisions20 = []
diversitys20 = []

ndcgs25 = []
recalls25 = []
precisions25 = []
diversitys25 = []

for playlist in test_playlists_tracks.keys():
    rel = test_playlists_tracks[playlist]
    sentence = idx2name[str(rel[0])] + ' ' + idx2name[str(rel[1])] + ' ' + idx2name[str(rel[2])] + ' ' + idx2name[str(rel[3])] + ' ' + idx2name[str(rel[4])]
    rec = find_similar(track_lyrics_embedding_matrix, sentence)
    rec = rec[5:]

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 10)

    ndcgs10.append(ndcg)
    recalls10.append(recall)
    precisions10.append(precision)
    diversitys10.append(diversity)

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 20)

    ndcgs20.append(ndcg)
    recalls20.append(recall)
    precisions20.append(precision)
    diversitys20.append(diversity)

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 25)

    ndcgs25.append(ndcg)
    recalls25.append(recall)
    precisions25.append(precision)
    diversitys25.append(diversity)

ndcg10 = np.mean(ndcgs10)
recall10 = np.mean(recalls10)
precision10 = np.mean(precisions10)
diversity10 = np.mean(diversitys10)

ndcg20 = np.mean(ndcgs20)
recall20 = np.mean(recalls20)
precision20 = np.mean(precisions20)
diversity20 = np.mean(diversitys20)

ndcg25 = np.mean(ndcgs25)
recall25 = np.mean(recalls25)
precision25 = np.mean(precisions25)
diversity25 = np.mean(diversitys25)

name = 'CB 5 tracks (Lyrics)'

name += ' '*(size - len(name))

print(name, end=' ')
print(f'{ndcg10:.5f}  {recall10:.5f}    {precision10:.5f}       {diversity10:.5f}       {ndcg20:.5f}  {recall20:.5f}    {precision20:.5f}       {diversity20:.5f}       {ndcg25:.5f}  {recall25:.5f}    {precision25:.5f}       {diversity25:.5f}')


# ContentBased(Lyrics and Name) Title

ndcgs10 = []
recalls10 = []
precisions10 = []
diversitys10 = []

ndcgs20 = []
recalls20 = []
precisions20 = []
diversitys20 = []

ndcgs25 = []
recalls25 = []
precisions25 = []
diversitys25 = []

for playlist in test_playlists_tracks.keys():
    rec = find_similar(track_names_and_lyrics_embedding_matrix, playlists_names[playlist]['name'])
    rel = test_playlists_tracks[playlist]

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 10)

    ndcgs10.append(ndcg)
    recalls10.append(recall)
    precisions10.append(precision)
    diversitys10.append(diversity)

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 20)

    ndcgs20.append(ndcg)
    recalls20.append(recall)
    precisions20.append(precision)
    diversitys20.append(diversity)

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 25)

    ndcgs25.append(ndcg)
    recalls25.append(recall)
    precisions25.append(precision)
    diversitys25.append(diversity)

ndcg10 = np.mean(ndcgs10)
recall10 = np.mean(recalls10)
precision10 = np.mean(precisions10)
diversity10 = np.mean(diversitys10)

ndcg20 = np.mean(ndcgs20)
recall20 = np.mean(recalls20)
precision20 = np.mean(precisions20)
diversity20 = np.mean(diversitys20)

ndcg25 = np.mean(ndcgs25)
recall25 = np.mean(recalls25)
precision25 = np.mean(precisions25)
diversity25 = np.mean(diversitys25)

name = 'CB Title (Name and Lyrics)'

name += ' '*(size - len(name))

print(name, end=' ')
print(f'{ndcg10:.5f}  {recall10:.5f}    {precision10:.5f}       {diversity10:.5f}       {ndcg20:.5f}  {recall20:.5f}    {precision20:.5f}       {diversity20:.5f}       {ndcg25:.5f}  {recall25:.5f}    {precision25:.5f}       {diversity25:.5f}')

# 1 track (name and Lyrics)

ndcgs10 = []
recalls10 = []
precisions10 = []
diversitys10 = []

ndcgs20 = []
recalls20 = []
precisions20 = []
diversitys20 = []

ndcgs25 = []
recalls25 = []
precisions25 = []
diversitys25 = []

for playlist in test_playlists_tracks.keys():
    rel = test_playlists_tracks[playlist]
    sentence = idx2name[str(rel[0])]
    rec = find_similar(track_names_and_lyrics_embedding_matrix, sentence)
    rec = rec[1:]

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 10)

    ndcgs10.append(ndcg)
    recalls10.append(recall)
    precisions10.append(precision)
    diversitys10.append(diversity)

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 20)

    ndcgs20.append(ndcg)
    recalls20.append(recall)
    precisions20.append(precision)
    diversitys20.append(diversity)

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 25)

    ndcgs25.append(ndcg)
    recalls25.append(recall)
    precisions25.append(precision)
    diversitys25.append(diversity)

ndcg10 = np.mean(ndcgs10)
recall10 = np.mean(recalls10)
precision10 = np.mean(precisions10)
diversity10 = np.mean(diversitys10)

ndcg20 = np.mean(ndcgs20)
recall20 = np.mean(recalls20)
precision20 = np.mean(precisions20)
diversity20 = np.mean(diversitys20)

ndcg25 = np.mean(ndcgs25)
recall25 = np.mean(recalls25)
precision25 = np.mean(precisions25)
diversity25 = np.mean(diversitys25)

name = 'CB 1 track (Name and Lyrics)'

name += ' '*(size - len(name))

print(name, end=' ')
print(f'{ndcg10:.5f}  {recall10:.5f}    {precision10:.5f}       {diversity10:.5f}       {ndcg20:.5f}  {recall20:.5f}    {precision20:.5f}       {diversity20:.5f}       {ndcg25:.5f}  {recall25:.5f}    {precision25:.5f}       {diversity25:.5f}')

# 5 tracks (Name and Lyrics)

ndcgs10 = []
recalls10 = []
precisions10 = []
diversitys10 = []

ndcgs20 = []
recalls20 = []
precisions20 = []
diversitys20 = []

ndcgs25 = []
recalls25 = []
precisions25 = []
diversitys25 = []

for playlist in test_playlists_tracks.keys():
    rel = test_playlists_tracks[playlist]
    sentence = idx2name[str(rel[0])] + ' ' + idx2name[str(rel[1])] + ' ' + idx2name[str(rel[2])] + ' ' + idx2name[str(rel[3])] + ' ' + idx2name[str(rel[4])]
    rec = find_similar(track_names_and_lyrics_embedding_matrix, sentence)
    rec = rec[5:]

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 10)

    ndcgs10.append(ndcg)
    recalls10.append(recall)
    precisions10.append(precision)
    diversitys10.append(diversity)

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 20)

    ndcgs20.append(ndcg)
    recalls20.append(recall)
    precisions20.append(precision)
    diversitys20.append(diversity)

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 25)

    ndcgs25.append(ndcg)
    recalls25.append(recall)
    precisions25.append(precision)
    diversitys25.append(diversity)

ndcg10 = np.mean(ndcgs10)
recall10 = np.mean(recalls10)
precision10 = np.mean(precisions10)
diversity10 = np.mean(diversitys10)

ndcg20 = np.mean(ndcgs20)
recall20 = np.mean(recalls20)
precision20 = np.mean(precisions20)
diversity20 = np.mean(diversitys20)

ndcg25 = np.mean(ndcgs25)
recall25 = np.mean(recalls25)
precision25 = np.mean(precisions25)
diversity25 = np.mean(diversitys25)

name = 'CB 5 tracks (Name and Lyrics)'

name += ' '*(size - len(name))

print(name, end=' ')
print(f'{ndcg10:.5f}  {recall10:.5f}    {precision10:.5f}       {diversity10:.5f}       {ndcg20:.5f}  {recall20:.5f}    {precision20:.5f}       {diversity20:.5f}       {ndcg25:.5f}  {recall25:.5f}    {precision25:.5f}       {diversity25:.5f}')



                                    ndcg@10  recall@10  precision@10  diversity@10  ndcg@20  recall@20  precision@20  diversity@20  ndcg@25  recall@25  precision@25  diversity@25
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Random                              0.00860  0.00050    0.00150       0.93289       0.00860  0.00050    0.00075       0.92879       0.01075  0.00083    0.00100       0.92965
Most Popular                        0.05456  0.00400    0.01200       0.75556       0.06410  0.00818    0.01225       0.79474       0.07228  0.01055    0.01260       0.80333
CB Title (Name)                     0.02318  0.00201    0.00600       0.89956       0.02587  0.00251    0.00375       0.90505       0.02809  0.00302    0.00360       0.90648
CB 1 track (Name)                   0.08424  0.00622    0.01850       0.89067       0.08876  0.00844    0.01250       0.893