# Librerías

In [None]:
from collections import defaultdict
from metricas import dcg, idcg, average_precision, novelty_for_single_user, diversity_for_single_user
from sklearn.metrics import pairwise_distances
import numpy as np
import pandas as pd
import random as rd

# Datasets

In [None]:
track_features = pd.read_csv('./data/processed_track_features.csv')
sessions = pd.read_csv('./data/processed_sessions.csv')
sessions.head()

# Preparaciones

In [None]:
user_histories = defaultdict(list)
for i in sessions.index:
    row = sessions.loc[i]
    user_histories[row['session_id']].append(row['track_id'])
for user_id in user_histories.keys():
    user_histories[user_id] = np.array(user_histories[user_id])

In [None]:
edited_track_features = track_features.rename(columns={'track_id': 'item_id'}).set_index('item_id')
edited_sessions = sessions.rename(columns={'session_id': 'user_id', 'track_id': 'item_id'})

# Modelo

In [None]:
class AlphaBetaRecommender:

    def __init__(self, item_features: pd.DataFrame, interactions: pd.DataFrame) -> None:
        self.item_features = item_features
        self.user_histories = defaultdict(list)
        for i in interactions.index:
            row = interactions.loc[i]
            self.user_histories[row['user_id']].append(row['item_id'])
        for user_id in self.user_histories.keys():
            user_histories[user_id] = np.array(user_histories[user_id])
    
    def recommend(self, user_id: str, alpha: float, beta: float, number_of_recommendations=10):
        user_history_ids = self.user_histories[user_id]
        recommendations = []
        # Primero, recomendar la proporción de canciones conocidas dada por 1-alpha:
        for _ in np.arange(round((1 - alpha) * number_of_recommendations)):
            recommendations.append(rd.choice(user_history_ids))
        # Luego, rellenar con canciones con distancia dada por beta hasta alcanzar number_of_recommendations.
        user_history_features = np.array(self.item_features.loc[user_history_ids].mean()) 
        distance_ranking = pd.DataFrame()
        distance_ranking.index = self.item_features.index
        distance_ranking.loc[:, 'distances'] = pairwise_distances(
            X=user_history_features.reshape(1, -1),
            Y=self.item_features,
            metric='cosine'
        )[0]
        distance_ranking = distance_ranking.sort_values(by='distances')
        while len(recommendations) < number_of_recommendations:
            lower_index = round(beta * distance_ranking.shape[0])
            upper_index = lower_index + number_of_recommendations
            if upper_index >= distance_ranking.shape[0]:
                upper_index = distance_ranking.shape[0]
                lower_index = upper_index - number_of_recommendations
            recommendations += list(distance_ranking.iloc[lower_index:upper_index].index)
        return np.array(recommendations)

In [None]:
alpha_beta_recommender = AlphaBetaRecommender(item_features=edited_track_features, interactions=edited_sessions)
alpha_beta_recommender.recommend('0_00006f66-33e5-4de7-a324-2d18e439fc1e', 0.5, 0.5)

In [None]:
unique_session_ids = sessions['session_id'].unique()
unique_session_ids_length = len(unique_session_ids)
alphas = (0, 0.3, 0.7, 1)
betas = (0, 0.3, 0.7, 1)
for alpha in alphas:
    for beta in betas:
        ndcg = 0
        mean_average_precision = 0
        novelty = 0
        diversity = 0
        for session_id in unique_session_ids:
            recommendations = alpha_beta_recommender.recommend(session_id, alpha, beta)
            user_history = user_histories[session_id]
            ndcg += dcg(user_history, recommendations, 10)
            mean_average_precision += average_precision(user_history, recommendations, 10)
            novelty += novelty_for_single_user(user_history, recommendations, edited_track_features)
            diversity += diversity_for_single_user(recommendations, edited_track_features)
        print(f'\nalpha = {alpha}, beta = {beta}')
        print('NDCG@10:', ndcg / idcg(10) / unique_session_ids_length)
        print('MAP@10:', mean_average_precision / unique_session_ids_length)
        print('Novelty:', novelty / unique_session_ids_length)
        print('Diversity:', diversity / unique_session_ids_length)