In [1]:
import pathlib
from argparse import ArgumentParser

import numpy as np
import pandas as pd
import torch as th
import sklearn as sk

from pytorch_lightning import seed_everything, Trainer
from ranking_metrics_torch.precision_recall import precision_at, recall_at
from ranking_metrics_torch.cumulative_gain import ndcg_at
from torch_factorization_models.implicit_mf import ImplicitMatrixFactorization
from torch_factorization_models.movielens import MovielensDataset, MovielensDataModule

In [2]:
seed_everything(42)  # same seed used to create splits in training

42

### Load dataset

In [3]:
movielens_module = MovielensDataModule("/home/karl/Projects/datasets/ml-20m/")
movielens_module.setup()

In [4]:
movielens = movielens_module.dataset
preprocessor = movielens.preprocessor
user_xformer = preprocessor.named_transformers_['user_id']
item_xformer = preprocessor.named_transformers_['item_id']

### Load the model

In [5]:
parser = ArgumentParser(add_help=False)
parser = Trainer.add_argparse_args(parser)
parser = ImplicitMatrixFactorization.add_model_specific_args(parser)

args = parser.parse_args(args=[])
args.num_users = 138287
args.num_items = 20720
# args.use_biases = False
args.embedding_dim = 32
args.eval_cutoff = th.tensor([100])

args

Namespace(accumulate_grad_batches=1, amp_backend='native', amp_level='O2', auto_lr_find=False, auto_scale_batch_size=False, auto_select_gpus=False, benchmark=False, beta1=0.9, beta2=0.999, check_val_every_n_epoch=1, checkpoint_callback=True, default_root_dir=None, deterministic=False, distributed_backend=None, early_stop_callback=False, embedding_dim=32, eval_cutoff=tensor([100]), fast_dev_run=False, gpus=<function Trainer._gpus_arg_default at 0x7f65551b3040>, gradient_clip_val=0, learning_rate=0.1, limit_test_batches=1.0, limit_train_batches=1.0, limit_val_batches=1.0, log_gpu_memory=None, log_save_interval=100, logger=True, loss='logistic', max_epochs=1000, max_steps=None, min_epochs=1, min_steps=None, momentum=0.9, num_items=20720, num_nodes=1, num_processes=1, num_sanity_val_steps=2, num_users=138287, optimizer='sgd', overfit_batches=0.0, overfit_pct=None, precision=32, prepare_data_per_node=True, process_position=0, profiler=None, progress_bar_refresh_rate=1, reload_dataloaders_ev

In [7]:
model = ImplicitMatrixFactorization(args)

state_dict = th.load("../models/38ov3g28-honest-lake-213.pt")

# preprocessor = state_dict['preprocessor']
del state_dict['preprocessor']
state_dict['global_bias_idx'] = th.LongTensor([0])

model.load_state_dict(state_dict)

<All keys matched successfully>

In [8]:
if th.cuda.is_available():
    model.cuda()

In [10]:
movielens_module.dataset.to_(device=model.device)

In [11]:
dataloader = movielens_module.val_dataloader(by_user=True)

### Random

In [12]:
def random_predict(user_ids, num_items):
    global model
    return th.rand((len(user_ids), num_items), device=model.device)

In [13]:
random_metrics = model.compute_validation_metrics(
    dataloader,
    random_predict,
)

	nonzero()
Consider using one of the following signatures instead:
	nonzero(*, bool as_tuple) (Triggered internally at  /pytorch/torch/csrc/utils/python_arg_parser.cpp:766.)
  rel_indices = (num_relevant != 0).nonzero()


In [14]:
print(f"Precision: {random_metrics['precision']:.4f}")
print(f"Recall: {random_metrics['recall']:.4f}")
print(f"NDCG: {random_metrics['ndcg']:.4f}")

Precision: 0.0004
Recall: 0.0049
NDCG: 0.0018


### Popularity

In [15]:
training_subset_items = movielens.item_ids[movielens_module.training.indices]
unique, counts = np.unique(training_subset_items.cpu(), return_counts=True)

In [16]:
pop_scores = th.zeros(movielens.num_items)

for index, count in dict(zip(unique, counts)).items():
    pop_scores[index] = count
    
pop_scores /= np.amax(counts)

pop_scores = pop_scores.to(device=model.device)

def pop_predict(user_ids, num_items):
    return pop_scores.expand(len(user_ids), -1)

In [17]:
pop_metrics = model.compute_validation_metrics(
    dataloader,
    pop_predict,
)

In [18]:
print(f"Precision: {pop_metrics['precision']:.4f}")
print(f"Recall: {pop_metrics['recall']:.4f}")
print(f"NDCG: {pop_metrics['ndcg']:.4f}")

Precision: 0.0187
Recall: 0.3019
NDCG: 0.1287


### Model

In [24]:
model_metrics = model.compute_validation_metrics(
    dataloader,
    model.eval_predict,
)

In [25]:
print(f"Precision: {model_metrics['precision']:.4f}")
print(f"Recall: {model_metrics['recall']:.4f}")
print(f"NDCG: {model_metrics['ndcg']:.4f}")

Precision: 0.0261
Recall: 0.5017
NDCG: 0.1875
