# Librerías

In [1]:
from collections import defaultdict
from metricas import dcg, idcg, average_precision, novelty_for_single_user, diversity_for_single_user
from sklearn.metrics import pairwise_distances
import numpy as np
import pandas as pd

# Datasets

In [2]:
track_features = pd.read_csv('./data/processed_track_features.csv')
sessions = pd.read_csv('./data/processed_sessions.csv')
sessions.head()

Unnamed: 0,session_id,track_id
0,0_00006f66-33e5-4de7-a324-2d18e439fc1e,t_0479f24c-27d2-46d6-a00c-7ec928f2b539
1,0_00006f66-33e5-4de7-a324-2d18e439fc1e,t_9099cd7b-c238-47b7-9381-f23f2c1d1043
2,0_00006f66-33e5-4de7-a324-2d18e439fc1e,t_fc5df5ba-5396-49a7-8b29-35d0d28249e0
3,0_00006f66-33e5-4de7-a324-2d18e439fc1e,t_23cff8d6-d874-4b20-83dc-94e450e8aa20
4,0_00006f66-33e5-4de7-a324-2d18e439fc1e,t_64f3743c-f624-46bb-a579-0f3f9a07a123


# Preparaciones

In [3]:
user_histories = defaultdict(list)
for i in sessions.index:
    row = sessions.loc[i]
    user_histories[row['session_id']].append(row['track_id'])
for user_id in user_histories.keys():
    user_histories[user_id] = np.array(user_histories[user_id])

In [4]:
edited_track_features = track_features.rename(columns={'track_id': 'item_id'}).set_index('item_id')
edited_sessions = sessions.rename(columns={'session_id': 'user_id', 'track_id': 'item_id'})

# Modelo

In [5]:
class AlphaBetaRecommender:

    def __init__(self, item_features: pd.DataFrame, interactions: pd.DataFrame, number_of_recommendations=10) -> None:
        self.item_features = item_features

        self.user_histories = defaultdict(list)
        for i in interactions.index:
            row = interactions.loc[i]
            self.user_histories[row['user_id']].append(row['item_id'])
        for user_id in self.user_histories.keys():
            user_histories[user_id] = np.array(user_histories[user_id])

        self.number_of_recommendations = np.arange(number_of_recommendations)

        self.max_alpha_jump = 0.2 * self.item_features.shape[0] // 1
        self.max_beta_jump = 0.02 * self.item_features.shape[0] // 1
        self.set_alpha_and_beta(0, 0)
    
    def get_distance_ranking(self, user_id: np.ndarray):
        user_history_ids = self.user_histories[user_id]
        user_feature_vector = np.array(self.item_features.loc[user_history_ids].mean())
        distance_ranking = pd.DataFrame()
        distance_ranking.index = self.item_features.index
        distance_ranking.loc[:, 'distances'] = pairwise_distances(
            X=user_feature_vector.reshape(1, -1),
            Y=self.item_features,
            metric='cosine'
        )[0]
        return distance_ranking.sort_values(by='distances')

    def recommend(self, user_id: str):
        distance_ranking = self.get_distance_ranking(user_id)
        recommendations = distance_ranking.iloc[self.jumps].index
        return np.array(recommendations)

    def set_alpha_and_beta(self, alpha: float, beta: float):
        self.jumps = []
        alpha_jump = int(alpha * self.max_alpha_jump // 1)
        beta_jump = int(beta * self.max_beta_jump // 1)
        jump = alpha_jump
        for _ in self.number_of_recommendations:
            self.jumps.append(jump)
            jump += beta_jump

In [6]:
alpha_beta_recommender = AlphaBetaRecommender(
    item_features=edited_track_features,
    interactions=edited_sessions,
    number_of_recommendations=10
)
alpha_beta_recommender.set_alpha_and_beta(0.5, 0.5)
alpha_beta_recommender.recommend('0_00006f66-33e5-4de7-a324-2d18e439fc1e')

array(['t_3790a7f8-cbaf-4772-8b8a-e56bcfe4626e',
       't_6845d878-45c3-4ae6-939b-6411646a6303',
       't_1a6c16ba-3ee7-4853-bf8a-5a25b1238d37',
       't_0ecc42e8-96ae-4804-9434-42ff99b7422e',
       't_728a2528-9867-4c0b-b1f5-66f2f8337daa',
       't_22ce6dbc-baf4-4782-9a97-1f22b16deb55',
       't_94841b38-fa73-4620-a6d9-5f51d6d0aa52',
       't_e55a30c5-4ed7-4e84-9301-4f276f6a06b3',
       't_f9163204-3b05-4b5b-a681-9efead596fa2',
       't_ca1b09f4-3c1a-4104-8a25-4b36ed193c27'], dtype=object)

# Resultados

In [7]:
unique_session_ids = sessions['session_id'].unique()
unique_session_ids_length = len(unique_session_ids)
alphas = (0, 0.3, 0.7, 1)
betas = (0, 0.3, 0.7, 1)
for alpha in alphas:
    for beta in betas:
        ndcg = 0
        mean_average_precision = 0
        novelty = 0
        diversity = 0
        alpha_beta_recommender.set_alpha_and_beta(alpha, beta)
        print(f'\nalpha = {alpha}, beta = {beta}')
        for session_id in unique_session_ids:
            recommendations = alpha_beta_recommender.recommend(session_id)
            user_history = user_histories[session_id]
            ndcg += dcg(user_history, recommendations, 10)
            mean_average_precision += average_precision(user_history, recommendations, 10)
            novelty += novelty_for_single_user(user_history, recommendations, edited_track_features)
            diversity += diversity_for_single_user(recommendations, edited_track_features)
        print('NDCG@10:', ndcg / idcg(10) / unique_session_ids_length)
        print('MAP@10:', mean_average_precision / unique_session_ids_length)
        print('Novelty:', novelty / unique_session_ids_length)
        print('Diversity:', diversity / unique_session_ids_length)


alpha = 0, beta = 0
