# Librerías

In [1]:
from collections import defaultdict
import pandas as pd
import ipywidgets as widgets
from sklearn.metrics import pairwise_distances
import numpy as np
import random as rd

# Importar datos preprocesados

In [2]:
track_features = pd.read_csv('./data/processed_track_features.csv')
sessions = pd.read_csv('./data/processed_sessions.csv')
display(track_features.head(3))
display(sessions.head(3))

Unnamed: 0,track_id,duration,year,us_popularity_estimate,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence
0,t_a540e552-16d4-42f8-a185-232bd650ea7d,0.061365,0.964392,0.999754,0.45804,0.399767,0.817709,3.254327e-06,0,0.132124,0.1873,0,0.079985,0.760082,4,0.935512
1,t_67965da0-132b-4b1e-8a69-0ef99b32287c,0.104988,0.964392,0.999694,0.916272,0.491235,0.154258,8.344854e-12,0,0.163281,0.228433,0,0.083877,0.435429,3,0.359675
2,t_0614ecd3-a7d5-40a1-816e-156d5872a467,0.089967,0.964886,0.996025,0.812884,0.491625,0.358813,2.927475e-10,0,0.090115,0.175367,1,0.038777,0.480795,4,0.726769


Unnamed: 0,session_id,track_id
0,0_00006f66-33e5-4de7-a324-2d18e439fc1e,t_0479f24c-27d2-46d6-a00c-7ec928f2b539
1,0_00006f66-33e5-4de7-a324-2d18e439fc1e,t_9099cd7b-c238-47b7-9381-f23f2c1d1043
2,0_00006f66-33e5-4de7-a324-2d18e439fc1e,t_fc5df5ba-5396-49a7-8b29-35d0d28249e0


# Crear Top N verdadero por usuario

In [6]:
# true_top_n_for_each_user es un diccionario de la forma
# {usuario1: [canción1, canción2, ...], usuario2: [canción3, canción4,...], ...}
# que contiene las canciones con las que ha interactuado un usuario. Consideraremos que si el
# usuario ha interactuado con la canción, entonces la canción es relevante para el usuario.
true_top_n_for_each_user = defaultdict(list)
for i in sessions.index:
    row = sessions.loc[i]
    true_top_n_for_each_user[row['session_id']].append(row['track_id'])

# Resultados

## 1. Alpha-Beta Recommender

Basado en práctico Content Based (Texto) (https://github.com/PUC-RecSys-Class/RecSysPUC-2022/blob/master/practicos/Content_Based_texto.ipynb)

In [7]:
def find_similar_songs(sessions, track_features, alpha, beta, session_id=None, metric='cosine', topk=5):
    
    session_tracks = sessions[sessions.session_id == session_id]
    tracks_played = session_tracks.merge(track_features, left_on="track_id", right_on="track_id")
    tracks_not_played = track_features[~track_features.track_id.isin(tracks_played)]
    initial_track = np.array(tracks_played.iloc[0].drop(labels=["session_id", "track_id"]))
    
    embedding = tracks_not_played.drop(labels="track_id", axis=1).values
    not_played_distances = pairwise_distances(initial_track.reshape(1,-1), embedding, metric=metric)
    tracks_not_played["distance"] = not_played_distances[0]
    tracks_not_played.sort_values(by=["distance"])
    
    embedding= tracks_played.drop(labels=["session_id", "track_id"], axis=1).values
    played_distances = pairwise_distances(initial_track.reshape(1,-1), embedding, metric=metric)
    tracks_played["distance"] = played_distances[0]
    tracks_played.sort_values(by=["distance"])
    
    rec = []
    
    for i in range(topk):
        # No escuchados
        if rd.random() < alpha:
            loc = int(beta*len(tracks_not_played)//1)
            track = tracks_not_played.iloc[loc]
            while track.track_id in rec:
                loc += 1
                track = tracks_not_played.iloc[loc]
            rec.append(track.track_id)
        # Ya escuchados
        else:
            loc = int(beta*len(tracks_played)//1)
            if loc == 0:
                loc += 1
            track = tracks_played.iloc[loc]
            while track.track_id in rec:
                loc += 1
                track = tracks_played.iloc[loc]
            rec.append(track.track_id)
    
    return rec

In [8]:
random_session = rd.choice(sessions.session_id.unique())
random_session

'0_085131ec-8d9e-4cdd-afad-30b0086ecb27'

In [9]:
alpha = widgets.FloatSlider(min=0, max=1, step=0.01, description="alpha")
beta = widgets.FloatSlider(min=0, max=1, step=0.01, description="beta")
display(alpha)
display(beta)

FloatSlider(value=0.0, description='alpha', max=1.0, step=0.01)

FloatSlider(value=0.0, description='beta', max=1.0, step=0.01)

In [10]:
rec = find_similar_songs(sessions, track_features, alpha.value, beta.value, session_id=random_session, \
                         metric='euclidean', topk=5)
rec

['t_7a6b27e8-04d9-4ccf-aca7-6a1510c46f87',
 't_2a58f6bc-feb5-47f2-89bc-d1c28c14eff1',
 't_f31f64f8-a30a-4cbb-9dc0-32ac8928f213',
 't_f72c319b-0fad-48b9-9738-f97c608927fb',
 't_fad4877b-4b36-414e-a995-9a545534fbd1']

In [11]:
rec = find_similar_songs(sessions, track_features, alpha.value, beta.value, session_id=random_session, \
                         metric='cosine', topk=5)
rec

['t_7a6b27e8-04d9-4ccf-aca7-6a1510c46f87',
 't_2a58f6bc-feb5-47f2-89bc-d1c28c14eff1',
 't_f31f64f8-a30a-4cbb-9dc0-32ac8928f213',
 't_f72c319b-0fad-48b9-9738-f97c608927fb',
 't_fad4877b-4b36-414e-a995-9a545534fbd1']

## 2. LightFM