In [1]:
from models.training import GMFTrainer
from models.base_model import GMF
from evaluation.evaluator import Evaluator
import pandas as pd
import torch

In [2]:
sessions_train_df = pd.read_json('data_files/train_sessions.jsonl', lines=True)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

dataset = torch.utils.data.TensorDataset(
    torch.LongTensor(sessions_train_df['user_id'].values).to(device),
    torch.LongTensor(sessions_train_df['track_id'].values).to(device),
    torch.FloatTensor(sessions_train_df['score'].values).to(device)
)

train_loader = torch.utils.data.DataLoader(
    dataset,
    batch_size=1024,
    shuffle=True
)

unique_users_count = sessions_train_df['user_id'].nunique()
unique_tracks_count = sessions_train_df['track_id'].nunique()

In [3]:
gmf = GMF(unique_users_count, unique_tracks_count, 64)
gmf_trainer = GMFTrainer(gmf, train_loader, device)

In [4]:
gmf_trainer.train(10, 0.005)

Epoch 1/10 - Average Loss: 0.0564
Epoch 2/10 - Average Loss: 0.0390
Epoch 3/10 - Average Loss: 0.0325
Epoch 4/10 - Average Loss: 0.0304
Epoch 5/10 - Average Loss: 0.0297
Epoch 6/10 - Average Loss: 0.0294
Epoch 7/10 - Average Loss: 0.0294
Epoch 8/10 - Average Loss: 0.0293
Epoch 9/10 - Average Loss: 0.0293
Epoch 10/10 - Average Loss: 0.0293


In [5]:
ground_truth_df = pd.read_json('data_files/val_sessions.jsonl', lines=True)
all_users = torch.arange(unique_users_count).to(device)
all_items = torch.arange(unique_tracks_count).to(device)
all_pairs = torch.cartesian_prod(all_users, all_items)
user_input = all_pairs[:,0]
item_input = all_pairs[:,1]

In [6]:
predictions = []
batch_size = 10000

for i in range(0, len(all_pairs), batch_size):
    batch_pairs = all_pairs[i:i + batch_size]
    batch_user_input = batch_pairs[:,0].to(device)
    batch_item_input = batch_pairs[:,1].to(device)
    
    with torch.no_grad():
        batch_predictions = gmf.forward(batch_user_input, batch_item_input)
        predictions.append(batch_predictions)

all_predictions = torch.cat(predictions)

In [7]:
all_pairs_np = all_pairs.cpu().numpy()
all_predictions_np = all_predictions.cpu().numpy()

recommendations_df = pd.DataFrame({
    'user_id': all_pairs_np[:, 0],
    'track_id': all_pairs_np[:, 1],
    'score': all_predictions_np
})

In [8]:
evaluator2 = Evaluator([5, 10, 20, 30, 50])

In [9]:
print(ground_truth_df)

        user_id  track_id     score
0             0       508  0.750260
1             0      1884  0.750260
2             0      2140  0.289050
3             0        12  0.289050
4             0      1890  0.750260
...         ...       ...       ...
357306     2998      2392  0.524979
357307     2998      5661  0.750260
357308     2998      1622  0.750260
357309     2998       499  0.750260
357310     2998      4363  0.289050

[357311 rows x 3 columns]


In [None]:
metrics = evaluator2.evaluate(recommendations_df, ground_truth_df)