In [1]:
from models.training import GMFTrainer
from models.base_model import GMF
from evaluation.evaluator import Evaluator
import numpy as np
import pandas as pd
import torch

In [2]:
sessions_train_df = pd.read_json('data_files/train_sessions.jsonl', lines=True)
tracks_df = pd.read_json('data_files/tracks.jsonl', lines=True)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

dataset = torch.utils.data.TensorDataset(
    torch.LongTensor(sessions_train_df['user_id'].values).to(device),
    torch.LongTensor(sessions_train_df['track_id'].values).to(device),
    torch.FloatTensor(sessions_train_df['score'].values).to(device)
)

train_loader = torch.utils.data.DataLoader(
    dataset,
    batch_size=512,
    shuffle=True
)

unique_users_count = sessions_train_df['user_id'].nunique()
unique_tracks_count = len(tracks_df)

In [3]:
gmf = GMF(unique_users_count, unique_tracks_count, 4096)
gmf_trainer = GMFTrainer(gmf, train_loader, device)

In [4]:
gmf_trainer.train(15, 0.2)

Epoch 1/15 - Average Loss: nan
Epoch 2/15 - Average Loss: nan
Epoch 3/15 - Average Loss: nan
Epoch 4/15 - Average Loss: nan
Epoch 5/15 - Average Loss: nan
Epoch 6/15 - Average Loss: nan
Epoch 7/15 - Average Loss: nan
Epoch 8/15 - Average Loss: nan
Epoch 9/15 - Average Loss: nan
Epoch 10/15 - Average Loss: nan
Epoch 11/15 - Average Loss: nan
Epoch 12/15 - Average Loss: nan


KeyboardInterrupt: 

In [None]:
torch.cuda.empty_cache()

In [None]:
ground_truth_df = pd.read_json('data_files/val_sessions.jsonl', lines=True)
ground_truth_df = ground_truth_df[ground_truth_df['user_id'] < 1000]
all_users = torch.arange(1000)
all_items = torch.arange(unique_tracks_count)
all_pairs = torch.cartesian_prod(all_users, all_items)
user_input = all_pairs[:,0]
item_input = all_pairs[:,1]

In [None]:
predictions = []
batch_size = 10000
gmf.eval()
for i in range(0, len(all_pairs), batch_size):
    batch_pairs = all_pairs[i:i + batch_size]
    batch_user_input = batch_pairs[:,0].to(device)
    batch_item_input = batch_pairs[:,1].to(device)
    
    with torch.no_grad():
        batch_predictions = gmf.forward(batch_user_input, batch_item_input)
        predictions.append(batch_predictions)

all_predictions = torch.cat(predictions)

In [None]:
all_pairs_np = all_pairs.cpu().numpy()
all_predictions_np = all_predictions.cpu().numpy()

recommendations_df = pd.DataFrame({
    'user_id': all_pairs_np[:, 0],
    'track_id': all_pairs_np[:, 1],
    'score': all_predictions_np
})

In [None]:
evaluator2 = Evaluator([200, 400, 600, 800, 1000])

In [None]:
metrics = evaluator2.evaluate(recommendations_df, ground_truth_df)

In [None]:
for metric, value in metrics.items():
    print(f'{metric}: {value * 100}%')