In [1]:
import numpy as np
import random
import torch
from src.dataloaders import PairwiseDataset
from src.models import MatrixFactorizationBPRModel
from src.trainer import Trainer
from src.metrics import hitratio, ndcg

np.random.seed(42)
random.seed(42)
torch.manual_seed(42)

device = "cuda" if torch.cuda.is_available() else "cpu"
# device = 'cpu'
print(f"{device=}")

device='cuda'


In [2]:
class config:
    data_dir = "ml-100k"
    epochs = 100
    batch_size = 2048
    dim = 40
    lr = 0.1
    momentum = 0.6
    weight_decay = 0.01


dataset = PairwiseDataset(config.data_dir)
dataset.gen_adjacency()
dataset.make_train_test()
print(f"{dataset.train_size=}, {dataset.test_size=}")

metrics = {
    "HR@1": (hitratio, {"top_n": 1}),
    "HR@5": (hitratio, {"top_n": 5}),
    "HR@10": (hitratio, {"top_n": 10}),
    "NDCG@1": (ndcg, {"top_n": 1}),
    "NDCG@5": (ndcg, {"top_n": 5}),
    "NDCG@10": (ndcg, {"top_n": 10}),
}

dataset.train_size=100000, dataset.test_size=943


In [3]:
model = MatrixFactorizationBPRModel(dataset.user_count, dataset.item_count, config.dim)

optimizer = torch.optim.SGD(
    model.parameters(),
    lr=config.lr,
    momentum=config.momentum,
    nesterov=True,
    weight_decay=config.weight_decay,
)

trainer = Trainer(
    dataset,
    model,
    optimizer,
    metrics,
    epochs=config.epochs,
    batch_size=config.batch_size,
    device=device,
)

In [4]:
trainer.train(evaluate=True, verbose=False, progressbar=True)
# trainer.test(verbose=False, pbar=False)

In [5]:
best_epoch = np.argmax([r["NDCG@10"] for r in trainer.test_log])
print(f"{best_epoch}: {trainer.test_log[best_epoch]}")

98: {'HR@1': 0.34146341463414637, 'HR@5': 0.7348886532343585, 'HR@10': 0.8727465535524921, 'NDCG@1': 0.34146341463414637, 'NDCG@5': 0.5464361573761861, 'NDCG@10': 0.5913190592456186}


In [6]:
torch.save(trainer.model.state_dict(), "saved_models/mfbpr.pt")
# trainer.model.load_state_dict(torch.load("saved_models/mfbpr.pt"))