In [1]:
import os
import json
import numpy as np
import math
from itertools import combinations

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
datadir = '/content/drive/MyDrive/UC/Ramos/Sis. Rec./project/FastFMResults'
datadirDeep = '/content/drive/MyDrive/UC/Ramos/Sis. Rec./project/DeepFM'

In [4]:
with open('/content/drive/MyDrive/UC/Ramos/Sis. Rec./project/data/pid_itemid_1000p.txt', 'r') as f:
    lines = f.readlines()

playlists_tracks = {}

for line in lines:
    items = line.replace('\n','').split(' ')
    if int(items[0]) not in playlists_tracks:
        playlists_tracks[int(items[0])] = []
    playlists_tracks[int(items[0])].append(int(items[1]))

In [5]:
d = '/content/drive/MyDrive/UC/Ramos/Sis. Rec./project/data/'

with open(os.path.join(d, 'uri2genres.json'), 'r') as f:
    genres = json.load(f)

with open(os.path.join(d, 'idx2uri.json'), 'r') as f:
    idx2uri = json.load(f)

In [6]:
def id2genre(idx):
  # +1 porque uso index 0 para FastFM
  idx = idx + 1
  uri = idx2uri[str(idx)]
  if uri in genres:
    return genres[uri]
  else:
    return 'None'

## Metricas

In [7]:
def dcg_at_k(r, k):
    r = np.asfarray(r)[:k]
    if r.size:
        return np.sum((np.power(2, r) - 1) / np.log2(np.arange(2, r.size + 2)))
    return 0.0

def idcg_at_k(k):
    return dcg_at_k(np.ones(k), k)

def ndcg_at_k(r, k, max_relevant):
    idcg = idcg_at_k(min(k, max_relevant))
    if not idcg:
        return 0.0
    return dcg_at_k(r, k) / idcg

def calculate_ndcg(recommendations, relevant_items, k=10):
    """
    Calculate ndcg@k given recommendations and relevant items.

    Parameters:
        recommendations (list): List of recommended items (ordered by ranking).
        relevant_items (list): List of relevant items.
        k (int): The value of k for ndcg@k (default is 10).

    Returns:
        float: The ndcg@k score.
    """
    # Create a relevance vector: 1 if the item is relevant, 0 otherwise
    relevance = [1 if item in relevant_items else 0 for item in recommendations]

    # Calculate ndcg@k
    return ndcg_at_k(relevance, k, len(relevant_items))

def recall_at_k(relevant_items, recommended_items, k):
    relevant_items = set(relevant_items)
    recommended_items = set(recommended_items[:k])
    intersection = relevant_items.intersection(recommended_items)
    recall = len(intersection) / len(relevant_items)
    return recall

def precision_at_k(relevant_items, recommended_items, k):
    """
    Calcula la precisión en los primeros k elementos recomendados.

    Parameters:
        relevant_items (list): Lista de elementos relevantes.
        recommended_items (list): Lista ordenada de elementos recomendados.
        k (int): Número de elementos recomendados a considerar.

    Returns:
        float: Precision@k.
    """
    relevant_items = set(relevant_items)
    recommended_items = recommended_items[:k]
    relevant_recommended = [item for item in recommended_items if item in relevant_items]
    precision = len(relevant_recommended) / k
    return precision

In [8]:
def dcg(recommended_items, rel_items_labels):
  dcg_value = 0
  for idx, rel in enumerate(rel_items_labels):
    i = idx + 1
    dcg_value += ((2**rel-1)/math.log2(i + 1))
  return dcg_value

def ndcg_eq(recommended_items, relevant_items, k=10):
  recommended_items = recommended_items[:k]
  rel_items_labels = np.isin(recommended_items, relevant_items)
  count_rel_items = np.sum(rel_items_labels)
  ideal_rel_items_labels = np.concatenate([np.ones(count_rel_items, dtype=int), np.zeros(len(rel_items_labels) - count_rel_items, dtype=int)])
  dcg_value = dcg(recommended_items, rel_items_labels)
  ideal_dcg_value = dcg(recommended_items, ideal_rel_items_labels)
  ndcg_value = 0
  if dcg_value != 0:
    ndcg_value = dcg_value / ideal_dcg_value
  return ndcg_value

In [9]:
def diversity_at_n(recommendations, n):
    genres = [id2genre(id) for id in recommendations]
    genres = genres[:n]
    num_elements = len(genres)

    if num_elements <= 1:
        return 0

    def distance(genre1, genre2):
        return 1 if genre1 != genre2 else 0

    pairwise_distances = [
        distance(g1, g2) for g1, g2 in combinations(genres, 2)
    ]
    return sum(pairwise_distances) / len(pairwise_distances)

In [10]:
def get_metrics(recommendations, relevant_items, k=10):
    #ndcg = calculate_ndcg(recommendations, relevant_items, k)
    ndcg = ndcg_eq(recommendations, relevant_items, k)
    recall = recall_at_k(relevant_items, recommendations, k)
    precision = precision_at_k(relevant_items, recommendations, k)
    diversity = diversity_at_n(recommendations, k)
    return ndcg, recall, precision, diversity

In [11]:
files = os.listdir(datadir)

max_len = 0
for file in files:
    if len(file.replace('recommendations_', '').replace('.json', '')) > max_len:
        max_len = len(file.replace('recommendations_', '').replace('.json', ''))

size = max_len + 1

print(f' '*size,'ndcg@10  recall@10  precision@10  diversity@10  ndcg@20  recall@20  precision@20  diversity@20  ndcg@25  recall@25  precision@25  diversity@25')
print('-'*175)

for file in files:
    with open(os.path.join(datadir, file), 'r') as f:
        data = json.load(f)

    ndcgs10 = []
    recalls10 = []
    precisions10 = []
    diversitys10 = []

    ndcgs20 = []
    recalls20 = []
    precisions20 = []
    diversitys20 = []

    ndcgs25 = []
    recalls25 = []
    precisions25 = []
    diversitys25 = []

    recommendations = data['rec']
    relevant_items = data['rel']

    playlists = recommendations.keys()

    for playlist in playlists:
        rec = recommendations[playlist]
        rel = relevant_items[playlist]

        ndcg, recall, precision, diversity = get_metrics(rec, rel, 10)
        ndcgs10.append(ndcg)
        recalls10.append(recall)
        precisions10.append(precision)
        diversitys10.append(diversity)

        ndcg, recall, precision, diversity = get_metrics(rec, rel, 20)
        ndcgs20.append(ndcg)
        recalls20.append(recall)
        precisions20.append(precision)
        diversitys20.append(diversity)

        ndcg, recall, precision, diversity = get_metrics(rec, rel, 25)
        ndcgs25.append(ndcg)
        recalls25.append(recall)
        precisions25.append(precision)
        diversitys25.append(diversity)

    ndcg10 = np.mean(ndcgs10)
    recall10 = np.mean(recalls10)
    precision10 = np.mean(precisions10)
    diversity10 = np.mean(diversitys10)

    ndcg20 = np.mean(ndcgs20)
    recall20 = np.mean(recalls20)
    precision20 = np.mean(precisions20)
    diversity20 = np.mean(diversitys20)

    ndcg25 = np.mean(ndcgs25)
    recall25 = np.mean(recalls25)
    precision25 = np.mean(precisions25)
    diversity25 = np.mean(diversitys25)

    name = file.replace('recommendations_', '').replace('.json', '')

    name += ' '*(size - len(name))

    print(name, end=' ')
    print(f'{ndcg10:.5f}  {recall10:.5f}    {precision10:.5f}       {diversity10:.5f}       {ndcg20:.5f}  {recall20:.5f}    {precision20:.5f}       {diversity20:.5f}       {ndcg25:.5f}  {recall25:.5f}    {precision25:.5f}       {diversity25:.5f}')

with open(os.path.join(datadirDeep, 'deepfm_feature_recomendations_reverted.json'), 'r') as f:
    deepfm_data = json.load(f)

ndcgs10 = []
recalls10 = []
precisions10 = []
diversitys10 = []

ndcgs20 = []
recalls20 = []
precisions20 = []
diversitys20 = []

ndcgs25 = []
recalls25 = []
precisions25 = []
diversitys25 = []

for playlist in deepfm_data.keys():
    rec = deepfm_data[playlist]
    rel = playlists_tracks[int(playlist)]

    rec = [x-1 for x in rec]

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 10)

    ndcgs10.append(ndcg)
    recalls10.append(recall)
    precisions10.append(precision)
    diversitys10.append(diversity)

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 20)

    ndcgs20.append(ndcg)
    recalls20.append(recall)
    precisions20.append(precision)
    diversitys20.append(diversity)

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 25)

    ndcgs25.append(ndcg)
    recalls25.append(recall)
    precisions25.append(precision)
    diversitys25.append(diversity)

ndcg10 = np.mean(ndcgs10)
recall10 = np.mean(recalls10)
precision10 = np.mean(precisions10)
diversity10 = np.mean(diversitys10)

ndcg20 = np.mean(ndcgs20)
recall20 = np.mean(recalls20)
precision20 = np.mean(precisions20)
diversity20 = np.mean(diversitys20)

ndcg25 = np.mean(ndcgs25)
recall25 = np.mean(recalls25)
precision25 = np.mean(precisions25)
diversity25 = np.mean(diversitys25)

name = 'DeepFM'

name += ' '*(size - len(name))

print(name, end=' ')
print(f'{ndcg10:.5f}  {recall10:.5f}    {precision10:.5f}       {diversity10:.5f}       {ndcg20:.5f}  {recall20:.5f}    {precision20:.5f}       {diversity20:.5f}       {ndcg25:.5f}  {recall25:.5f}    {precision25:.5f}       {diversity25:.5f}')


with open(os.path.join(datadirDeep, 'deepfm_title_recomendations.json'), 'r') as f:
    deepfm_data = json.load(f)

ndcgs10 = []
recalls10 = []
precisions10 = []
diversitys10 = []

ndcgs20 = []
recalls20 = []
precisions20 = []
diversitys20 = []

ndcgs25 = []
recalls25 = []
precisions25 = []
diversitys25 = []

for playlist in deepfm_data.keys():
    rec = deepfm_data[playlist]
    rel = playlists_tracks[int(playlist)]

    rec = [x-1 for x in rec]

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 10)

    ndcgs10.append(ndcg)
    recalls10.append(recall)
    precisions10.append(precision)
    diversitys10.append(diversity)

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 20)

    ndcgs20.append(ndcg)
    recalls20.append(recall)
    precisions20.append(precision)
    diversitys20.append(diversity)

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 25)

    ndcgs25.append(ndcg)
    recalls25.append(recall)
    precisions25.append(precision)
    diversitys25.append(diversity)

ndcg10 = np.mean(ndcgs10)
recall10 = np.mean(recalls10)
precision10 = np.mean(precisions10)
diversity10 = np.mean(diversitys10)

ndcg20 = np.mean(ndcgs20)
recall20 = np.mean(recalls20)
precision20 = np.mean(precisions20)
diversity20 = np.mean(diversitys20)

ndcg25 = np.mean(ndcgs25)
recall25 = np.mean(recalls25)
precision25 = np.mean(precisions25)
diversity25 = np.mean(diversitys25)

name = 'DeepFM - title'

name += ' '*(size - len(name))

print(name, end=' ')
print(f'{ndcg10:.5f}  {recall10:.5f}    {precision10:.5f}       {diversity10:.5f}       {ndcg20:.5f}  {recall20:.5f}    {precision20:.5f}       {diversity20:.5f}       {ndcg25:.5f}  {recall25:.5f}    {precision25:.5f}       {diversity25:.5f}')



with open(os.path.join(datadirDeep, 'deepfm_title_and_lyrics_recomendations.json'), 'r') as f:
    deepfm_data = json.load(f)

ndcgs10 = []
recalls10 = []
precisions10 = []
diversitys10 = []

ndcgs20 = []
recalls20 = []
precisions20 = []
diversitys20 = []

ndcgs25 = []
recalls25 = []
precisions25 = []
diversitys25 = []

for playlist in deepfm_data.keys():
    rec = deepfm_data[playlist]
    rel = playlists_tracks[int(playlist)]

    rec = [x-1 for x in rec]

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 10)

    ndcgs10.append(ndcg)
    recalls10.append(recall)
    precisions10.append(precision)
    diversitys10.append(diversity)

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 20)

    ndcgs20.append(ndcg)
    recalls20.append(recall)
    precisions20.append(precision)
    diversitys20.append(diversity)

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 25)

    ndcgs25.append(ndcg)
    recalls25.append(recall)
    precisions25.append(precision)
    diversitys25.append(diversity)

ndcg10 = np.mean(ndcgs10)
recall10 = np.mean(recalls10)
precision10 = np.mean(precisions10)
diversity10 = np.mean(diversitys10)

ndcg20 = np.mean(ndcgs20)
recall20 = np.mean(recalls20)
precision20 = np.mean(precisions20)
diversity20 = np.mean(diversitys20)

ndcg25 = np.mean(ndcgs25)
recall25 = np.mean(recalls25)
precision25 = np.mean(precisions25)
diversity25 = np.mean(diversitys25)

name = 'DeepFM - title and lyrics'

name += ' '*(size - len(name))

print(name, end=' ')
print(f'{ndcg10:.5f}  {recall10:.5f}    {precision10:.5f}       {diversity10:.5f}       {ndcg20:.5f}  {recall20:.5f}    {precision20:.5f}       {diversity20:.5f}       {ndcg25:.5f}  {recall25:.5f}    {precision25:.5f}       {diversity25:.5f}')


with open(os.path.join(datadirDeep, 'deepfm_lyrics_recomendations.json'), 'r') as f:
    deepfm_data = json.load(f)

ndcgs10 = []
recalls10 = []
precisions10 = []
diversitys10 = []

ndcgs20 = []
recalls20 = []
precisions20 = []
diversitys20 = []

ndcgs25 = []
recalls25 = []
precisions25 = []
diversitys25 = []

for playlist in deepfm_data.keys():
    rec = deepfm_data[playlist]
    rel = playlists_tracks[int(playlist)]

    rec = [x-1 for x in rec]

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 10)

    ndcgs10.append(ndcg)
    recalls10.append(recall)
    precisions10.append(precision)
    diversitys10.append(diversity)

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 20)

    ndcgs20.append(ndcg)
    recalls20.append(recall)
    precisions20.append(precision)
    diversitys20.append(diversity)

    ndcg, recall, precision, diversity = get_metrics(rec, rel, 25)

    ndcgs25.append(ndcg)
    recalls25.append(recall)
    precisions25.append(precision)
    diversitys25.append(diversity)

ndcg10 = np.mean(ndcgs10)
recall10 = np.mean(recalls10)
precision10 = np.mean(precisions10)
diversity10 = np.mean(diversitys10)

ndcg20 = np.mean(ndcgs20)
recall20 = np.mean(recalls20)
precision20 = np.mean(precisions20)
diversity20 = np.mean(diversitys20)

ndcg25 = np.mean(ndcgs25)
recall25 = np.mean(recalls25)
precision25 = np.mean(precisions25)
diversity25 = np.mean(diversitys25)

name = 'DeepFM - lyrics'

name += ' '*(size - len(name))

print(name, end=' ')
print(f'{ndcg10:.5f}  {recall10:.5f}    {precision10:.5f}       {diversity10:.5f}       {ndcg20:.5f}  {recall20:.5f}    {precision20:.5f}       {diversity20:.5f}       {ndcg25:.5f}  {recall25:.5f}    {precision25:.5f}       {diversity25:.5f}')



                                        ndcg@10  recall@10  precision@10  diversity@10  ndcg@20  recall@20  precision@20  diversity@20  ndcg@25  recall@25  precision@25  diversity@25
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
danceability                            0.01005  0.00083    0.00250       0.94867       0.02256  0.00267    0.00400       0.94047       0.02368  0.00283    0.00340       0.93738
key                                     0.00000  0.00000    0.00000       0.92189       0.00000  0.00000    0.00000       0.91368       0.00000  0.00000    0.00000       0.89562
loudness                                0.00151  0.00017    0.00050       0.88178       0.00264  0.00033    0.00050       0.89292       0.00264  0.00033    0.00040       0.90570
mode                                    0.01967  0.00150    0.00450       0.97722       0.02960  0.00283   