# Librerías

In [1]:
from collections import defaultdict
from metricas import dcg, idcg, average_precision, novelty_for_single_user, diversity_for_single_user
from sklearn.metrics import pairwise_distances
import numpy as np
import pandas as pd
import random as rd

# Datasets

In [2]:
track_features = pd.read_csv('./data/processed_track_features.csv')
sessions = pd.read_csv('./data/processed_sessions.csv')
sessions.head()

Unnamed: 0,session_id,track_id
0,0_00006f66-33e5-4de7-a324-2d18e439fc1e,t_0479f24c-27d2-46d6-a00c-7ec928f2b539
1,0_00006f66-33e5-4de7-a324-2d18e439fc1e,t_9099cd7b-c238-47b7-9381-f23f2c1d1043
2,0_00006f66-33e5-4de7-a324-2d18e439fc1e,t_fc5df5ba-5396-49a7-8b29-35d0d28249e0
3,0_00006f66-33e5-4de7-a324-2d18e439fc1e,t_23cff8d6-d874-4b20-83dc-94e450e8aa20
4,0_00006f66-33e5-4de7-a324-2d18e439fc1e,t_64f3743c-f624-46bb-a579-0f3f9a07a123


# Preparaciones

In [3]:
user_histories = defaultdict(list)
for i in sessions.index:
    row = sessions.loc[i]
    user_histories[row['session_id']].append(row['track_id'])
for user_id in user_histories.keys():
    user_histories[user_id] = np.array(user_histories[user_id])

In [4]:
edited_track_features = track_features.rename(columns={'track_id': 'item_id'}).set_index('item_id')
edited_sessions = sessions.rename(columns={'session_id': 'user_id', 'track_id': 'item_id'})

# Modelo

In [5]:
class AlphaBetaRecommender:

    def __init__(self, item_features: pd.DataFrame, interactions: pd.DataFrame) -> None:
        self.item_features = item_features
        self.user_histories = defaultdict(list)
        for i in interactions.index:
            row = interactions.loc[i]
            self.user_histories[row['user_id']].append(row['item_id'])
        for user_id in self.user_histories.keys():
            user_histories[user_id] = np.array(user_histories[user_id])
    
    def recommend(self, user_id: str, alpha: float, beta: float, number_of_recommendations=10):
        user_history_ids = self.user_histories[user_id]
        recommendations = []
        # Primero, recomendar la proporción de canciones conocidas dada por 1-alpha:
        for _ in np.arange(round((1 - alpha) * number_of_recommendations)):
            recommendations.append(rd.choice(user_history_ids))
        # Luego, rellenar con canciones con distancia dada por beta hasta alcanzar number_of_recommendations.
        user_history_features = np.array(self.item_features.loc[user_history_ids].mean()) 
        distance_ranking = pd.DataFrame()
        distance_ranking.index = self.item_features.index
        distance_ranking.loc[:, 'distances'] = pairwise_distances(
            X=user_history_features.reshape(1, -1),
            Y=self.item_features,
            metric='cosine'
        )[0]
        distance_ranking = distance_ranking.sort_values(by='distances')
        while len(recommendations) < number_of_recommendations:
            lower_index = round(beta * distance_ranking.shape[0])
            upper_index = lower_index + number_of_recommendations
            if upper_index >= distance_ranking.shape[0]:
                upper_index = distance_ranking.shape[0]
                lower_index = upper_index - number_of_recommendations
            recommendations += list(distance_ranking.iloc[lower_index:upper_index].index)
        return np.array(recommendations)

In [6]:
alpha_beta_recommender = AlphaBetaRecommender(item_features=edited_track_features, interactions=edited_sessions)
alpha_beta_recommender.recommend('0_00006f66-33e5-4de7-a324-2d18e439fc1e', 0.5, 0.5)

array(['t_fc5df5ba-5396-49a7-8b29-35d0d28249e0',
       't_87d95b75-af5c-4ef6-8dc4-cd888ae17cce',
       't_59dc3fcd-7aec-4da5-a747-b59b19bab3bb',
       't_c815228b-3212-4f9e-9d4f-9cb19b248184',
       't_358c9cce-7a1e-4dd4-81de-206dda80363f',
       't_685e1ab0-b86c-42ba-9c19-dfd1049a1eed',
       't_e4648ae2-6c81-4d9f-9e5d-19d8dfe62824',
       't_60141c22-11d4-4b3b-9cb5-eb997e4b0f26',
       't_0d0c966d-fc20-4e45-a430-ff392195c0e4',
       't_fc14e596-3ce1-43d1-87e2-038534a5ea5d',
       't_edd2295c-9843-4a1c-9b11-84f3658c5e95',
       't_1f56b84c-941d-4b57-a927-afdfe287568f',
       't_1ca2fe11-a289-4cd7-ab2a-14e53d4122bd',
       't_613f889d-2450-46d2-ae24-d6a0d668e0f6',
       't_98c00dfe-beb6-4a40-9946-86cfedffb64f'], dtype='<U38')

In [7]:
unique_session_ids = sessions['session_id'].unique()
unique_session_ids_length = len(unique_session_ids)
alphas = (0, 0.3, 0.7, 1)
betas = (0, 0.3, 0.7, 1)
for alpha in alphas:
    for beta in betas:
        ndcg = 0
        mean_average_precision = 0
        novelty = 0
        diversity = 0
        for session_id in unique_session_ids:
            recommendations = alpha_beta_recommender.recommend(session_id, alpha, beta)
            user_history = user_histories[session_id]
            ndcg += dcg(user_history, recommendations, 10)
            mean_average_precision += average_precision(user_history, recommendations, 10)
            novelty += novelty_for_single_user(user_history, recommendations, edited_track_features)
            diversity += diversity_for_single_user(recommendations, edited_track_features)
        print(f'\nalpha = {alpha}, beta = {beta}')
        print('NDCG@10:', ndcg / idcg(10) / unique_session_ids_length)
        print('MAP@10:', mean_average_precision / unique_session_ids_length)
        print('Novelty:', novelty / unique_session_ids_length)
        print('Diversity:', diversity / unique_session_ids_length)


alpha = 0, beta = 0
NDCG@10: 0.6758093255826711
MAP@10: 0.7451784499716592
Novelty: 0.1540788732940982
Diversity: 0.15421483370907788

alpha = 0, beta = 0.3
NDCG@10: 0.675988926948734
MAP@10: 0.7447972752110126
Novelty: 0.15408443648592754
Diversity: 0.15408715080450058

alpha = 0, beta = 0.7
NDCG@10: 0.6765732656672353
MAP@10: 0.7454687118134292
Novelty: 0.15422633610619146
Diversity: 0.15417863459162034

alpha = 0, beta = 1
NDCG@10: 0.6746148318558681
MAP@10: 0.7461064703640723
Novelty: 0.15453394748959734
Diversity: 0.15486807642040984
