In [1]:
import os
import json
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from recsys.models import MF
from recsys.data.movielens import MovielensDataset
from recsys.utils.metrics import evaluate
from recsys.utils import recommender, visualizer

In [2]:
with open("movielens/processed/num_sparse_features.json", "r") as f:
    num_sparse_features = json.load(f)
num_sparse_features = {k:v for k,v in num_sparse_features.items() if k in ["user_id", "item_id"]}
    
with open("movielens/processed/experiment_group.json", "r") as f:
    experiment_group = json.load(f)
experiment_group[MF.model_name] = []

train = pd.read_parquet("movielens/processed/train.parquet")
test = pd.read_parquet("movielens/processed/test.parquet")

In [3]:
# Training Hyperparameter
batch_size = 64
latent_dim = 16
lr = 1e-3
epochs = 10

# Define Dataset
train_ds = MovielensDataset(train, list(num_sparse_features.keys()))
train_loader = torch.utils.data.DataLoader(train_ds, batch_size=batch_size, shuffle=True)

test_ds = MovielensDataset(test, list(num_sparse_features.keys()))
test_loader = torch.utils.data.DataLoader(test_ds, batch_size=test["item_id"].nunique(), shuffle=False)


device = torch.device("cpu")
model = MF(num_sparse_features, latent_dim)
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
print(model)

MatrixFactorization(
  (sparse_arch): ModuleDict(
    (user_id): Embedding(943, 16)
    (item_id): Embedding(1682, 16)
  )
)


In [None]:
history = recommender.train(model, train_loader, optimizer, device, epochs)
visualizer.plot_history(history)

Epoch 1/10: 100%|█| 5747/5747 [00:08<00:00, 687.48it/s, auc=0.0000, logloss=0.54
Epoch 2/10: 100%|█| 5747/5747 [00:08<00:00, 667.67it/s, auc=0.5001, logloss=0.85
Epoch 3/10: 100%|█| 5747/5747 [00:08<00:00, 680.07it/s, auc=0.5201, logloss=0.71
Epoch 4/10: 100%|█| 5747/5747 [00:08<00:00, 669.67it/s, auc=0.5740, logloss=0.41
Epoch 5/10: 100%|█| 5747/5747 [00:08<00:00, 662.53it/s, auc=0.7116, logloss=0.28
Epoch 6/10: 100%|█| 5747/5747 [00:09<00:00, 635.50it/s, auc=0.8163, logloss=0.26
Epoch 7/10: 100%|█| 5747/5747 [00:09<00:00, 630.77it/s, auc=0.8521, logloss=0.34
Epoch 8/10: 100%|█| 5747/5747 [00:09<00:00, 620.52it/s, auc=0.8746, logloss=0.29


In [None]:
user_recommendations = recommender.inference(model, device, test_loader)

ks = [5, 10, 15, 20]
_test = test[test["rating"]==1]
recommendations = {
    "popular": np.array(experiment_group["popular"]),
    "random": np.array(experiment_group["random"]),
    model.model_name: user_recommendations
}
results = evaluate(_test, recommendations, ks)
visualizer.plot_metrics(results)