# Librerías

In [1]:
from collections import defaultdict
from math import log2
from funciones_auxiliares import dcg_usuario, average_precision
import lightfm
import lightfm.data
import lightfm.cross_validation
import numpy as np
import pandas as pd



# Datasets

In [2]:
track_features = pd.read_csv('./data/processed_track_features.csv')
sessions = pd.read_csv('./data/processed_sessions.csv')

# Preparaciones

In [3]:
SEED = 0
TEST_PERCENTAGE = 0.25

In [4]:
dataset = lightfm.data.Dataset()
dataset.fit(
    users=sessions['session_id'],
    items=sessions['track_id']
)
user_id_map, _, item_id_map, _ = dataset.mapping()

In [5]:
interactions, _ = dataset.build_interactions(data=sessions.values)
train_interactions, test_interactions = lightfm.cross_validation.random_train_test_split(
    interactions=interactions,
    test_percentage=TEST_PERCENTAGE,
    random_state=np.random.RandomState(SEED)
)

In [6]:
model = lightfm.LightFM(
    no_components=20,
    loss='warp',
    learning_rate=0.25,
    random_state=np.random.RandomState(SEED)
)
model.fit(
    interactions=train_interactions,
    epochs=20
)

<lightfm.lightfm.LightFM at 0x1f26431e0d0>

# Resultados

In [7]:
true_top_n_for_each_user = defaultdict(list)
for i in sessions.index:
    row = sessions.loc[i]
    true_top_n_for_each_user[row['session_id']].append(row['track_id'])

In [8]:
idcg = 0
for i in np.arange(start=1, stop=10+1):
    idcg = 1 / log2(i + 1)

In [9]:
inverse_user_id_map = dict(map(reversed, user_id_map.items()))
inverse_item_id_map = dict(map(reversed, item_id_map.items()))
user_ids = np.array(tuple(user_id_map.values()))
item_ids = np.array(tuple(item_id_map.values()))

In [10]:
ndcg = 0
mean_average_precision = 0
for user_id in user_ids:
    predictions = model.predict(user_ids=int(user_id), item_ids=item_ids)
    predictions = np.argsort(predictions)[-10:]
    predictions = list(map(lambda item_id: inverse_item_id_map[item_id], predictions))
    user_id = inverse_user_id_map[user_id]
    ndcg += dcg_usuario(true_top_n_for_each_user[user_id], predictions, 10)
    mean_average_precision += average_precision(true_top_n_for_each_user[user_id], predictions, 10)
ndcg = ndcg / idcg / user_ids.shape[0]
mean_average_precision = mean_average_precision / user_ids.shape[0]
print(ndcg)
print(mean_average_precision)

0.009292415827556874
0.0016346428571428569
