# Librerías

In [1]:
from collections import defaultdict
from metricas import dcg, idcg, average_precision, novelty_for_single_user, diversity_for_single_user
from sklearn.metrics import pairwise_distances
import numpy as np
import pandas as pd
import random as rd

# Datasets

In [2]:
track_features = pd.read_csv('./data/processed_track_features.csv')
sessions = pd.read_csv('./data/processed_sessions.csv')
sessions.head()

Unnamed: 0,session_id,track_id
0,0_00006f66-33e5-4de7-a324-2d18e439fc1e,t_0479f24c-27d2-46d6-a00c-7ec928f2b539
1,0_00006f66-33e5-4de7-a324-2d18e439fc1e,t_9099cd7b-c238-47b7-9381-f23f2c1d1043
2,0_00006f66-33e5-4de7-a324-2d18e439fc1e,t_fc5df5ba-5396-49a7-8b29-35d0d28249e0
3,0_00006f66-33e5-4de7-a324-2d18e439fc1e,t_23cff8d6-d874-4b20-83dc-94e450e8aa20
4,0_00006f66-33e5-4de7-a324-2d18e439fc1e,t_64f3743c-f624-46bb-a579-0f3f9a07a123


# Preparaciones

In [3]:
true_top_n_for_each_user = defaultdict(list)
for i in sessions.index:
    row = sessions.loc[i]
    true_top_n_for_each_user[row['session_id']].append(row['track_id'])

In [4]:
edited_track_features = track_features.set_index('track_id')

# Modelo

In [7]:
def alpha_beta_recommender(session_id, sessions, track_features, alpha=0, beta=0, metric='cosine', topk=10):
    
    session_tracks = sessions[sessions.session_id == session_id]
    tracks_played = session_tracks.merge(track_features, left_on="track_id", right_on="track_id")
    tracks_not_played = track_features[~track_features.track_id.isin(tracks_played)]
    initial_track = np.array(tracks_played.iloc[0].drop(labels=["session_id", "track_id"]))
    
    embedding = tracks_not_played.drop(labels="track_id", axis=1).values
    not_played_distances = pairwise_distances(initial_track.reshape(1,-1), embedding, metric=metric)
    tracks_not_played["distance"] = not_played_distances[0]
    tracks_not_played.sort_values(by=["distance"])
    
    embedding= tracks_played.drop(labels=["session_id", "track_id"], axis=1).values
    played_distances = pairwise_distances(initial_track.reshape(1,-1), embedding, metric=metric)
    tracks_played["distance"] = played_distances[0]
    tracks_played.sort_values(by=["distance"])
    
    rec = []
    for i in range(topk):
        # No escuchados
        if rd.random() < alpha:
            loc = int(beta*len(tracks_not_played)//1)
            track = tracks_not_played.iloc[loc]
            while (track.track_id in rec) and (loc < len(tracks_played)):
                loc += 1
                track = tracks_not_played.iloc[loc]
            rec.append(track.track_id)
        # Ya escuchados
        else:
            loc = int(beta*len(tracks_played)//1)
            if loc == 0:
                loc += 1
            track = tracks_played.iloc[loc]
            while (track.track_id in rec) and (loc < len(tracks_played)):
                loc += 1
                track = tracks_played.iloc[loc]
            rec.append(track.track_id)
    return rec
    

# Resultados

In [8]:
unique_session_ids = sessions['session_id'].unique()
unique_session_ids_length = len(unique_session_ids)
metrics = ('NDCG@10', 'MAP@10', 'Novelty', 'Diversity')
alphas = (0, 0.3, 0.7, 1)
betas = (0, 0.3, 0.7, 1)
results = {metric: pd.DataFrame(index=alphas, columns=betas) for metric in metrics}
for alpha in alphas:
    for beta in betas:
        ndcg = 0
        mean_average_precision = 0
        novelty = 0
        diversity = 0
        for session_id in unique_session_ids:
            recommendations = alpha_beta_recommender(session_id, sessions, track_features, alpha, beta)
            true_top_n = true_top_n_for_each_user[session_id]
            ndcg += dcg(true_top_n, recommendations, 10)
            mean_average_precision += average_precision(true_top_n, recommendations, 10)
            novelty += novelty_for_single_user(session_id, recommendations, sessions, edited_track_features)
            diversity += diversity_for_single_user(recommendations, edited_track_features)
        results['NDCG@10'].loc[alpha, beta] = ndcg / idcg(10) / len(unique_session_ids)
        results['MAP@10'].loc[alpha, beta] = mean_average_precision / len(unique_session_ids)
        results['Novelty'].loc[alpha, beta] = novelty / len(unique_session_ids)
        results['Diversity'].loc[alpha, beta] = diversity / len(unique_session_ids)
        display(results['Novelty'])

IndexError: single positional indexer is out-of-bounds