In [7]:
import init_path

init_path.init()

import tqdm
import torch
import pandas as pd
import pickle as pkl
from datasets.pointwise import PointwiseDataset
from torch.utils.tensorboard import SummaryWriter
from models.biased_svd import BiasedSVD
from torch.utils.data import DataLoader
from torch.optim import Adam
from sklearn.metrics import roc_auc_score, accuracy_score

writer = SummaryWriter(log_dir="../runs/biased_svd")

if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")

print(f"Using device: {device}")

Using device: mps


In [8]:
train_df = pd.read_csv("../data/processed/train.csv")
val_df = pd.read_csv("../data/processed/val.csv")
test_df = pd.read_csv("../data/processed/test.csv")

item_encoder = pkl.load(open("../data/processed/item_encoder.pkl", "rb"))
user_encoder = pkl.load(open("../data/processed/user_encoder.pkl", "rb"))

train_dataset = PointwiseDataset(train_df)
val_dataset = PointwiseDataset(val_df)
test_dataset = PointwiseDataset(test_df)

In [9]:
model = BiasedSVD(len(user_encoder.classes_), len(item_encoder.classes_), 64)
model.to(device)

BiasedSVD(
  (user_embedding): Embedding(57530, 64)
  (item_embedding): Embedding(35123, 64)
  (user_bias): Embedding(57530, 1)
  (item_bias): Embedding(35123, 1)
  (criterion): BCEWithLogitsLoss()
)

In [10]:
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=256, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False)

In [11]:
optimizer = Adam(model.parameters(), lr=0.001, weight_decay=1e-5)

In [12]:
epochs = 10

for epoch in range(1, epochs + 1):
    model.train()
    train_loss = 0.0
    for user_ids, item_ids, labels in tqdm.tqdm(
        train_loader, desc=f"Epoch {epoch}/{epochs} - Training"
    ):
        user_ids = user_ids.to(device)
        item_ids = item_ids.to(device)
        labels = labels.float().to(device)

        optimizer.zero_grad()
        predictions = model(user_ids, item_ids)
        loss = model.loss(predictions, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * user_ids.size(0)

    train_loss /= len(train_loader.dataset)
    writer.add_scalar("Loss/Train", train_loss, epoch)

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for user_ids, item_ids, labels in tqdm.tqdm(
            val_loader, desc=f"Epoch {epoch+1}/{epochs} - Validation"
        ):
            user_ids = user_ids.to(device)
            item_ids = item_ids.to(device)
            labels = labels.float().to(device)

            predictions = model(user_ids, item_ids)
            loss = model.loss(predictions, labels)

            val_loss += loss.item() * user_ids.size(0)

    val_loss /= len(val_loader.dataset)
    writer.add_scalar("Loss/Validation", val_loss, epoch)

Epoch 1/10 - Training: 100%|██████████| 1415/1415 [00:23<00:00, 59.55it/s]
Epoch 2/10 - Validation: 100%|██████████| 225/225 [00:01<00:00, 178.52it/s]
Epoch 2/10 - Training: 100%|██████████| 1415/1415 [00:23<00:00, 60.31it/s]
Epoch 3/10 - Validation: 100%|██████████| 225/225 [00:01<00:00, 194.64it/s]
Epoch 3/10 - Training: 100%|██████████| 1415/1415 [00:24<00:00, 58.32it/s]
Epoch 4/10 - Validation: 100%|██████████| 225/225 [00:01<00:00, 194.56it/s]
Epoch 4/10 - Training: 100%|██████████| 1415/1415 [00:23<00:00, 60.16it/s]
Epoch 5/10 - Validation: 100%|██████████| 225/225 [00:01<00:00, 197.88it/s]
Epoch 5/10 - Training: 100%|██████████| 1415/1415 [00:23<00:00, 60.49it/s]
Epoch 6/10 - Validation: 100%|██████████| 225/225 [00:01<00:00, 191.68it/s]
Epoch 6/10 - Training: 100%|██████████| 1415/1415 [00:24<00:00, 57.07it/s]
Epoch 7/10 - Validation: 100%|██████████| 225/225 [00:01<00:00, 176.49it/s]
Epoch 7/10 - Training: 100%|██████████| 1415/1415 [00:23<00:00, 61.36it/s]
Epoch 8/10 - Valida

In [13]:
model.eval()

all_preds = []
all_targets = []
test_loss = 0

with torch.no_grad():
    for user_ids, item_ids, targets in test_loader:
        user_ids = user_ids.to(device)
        item_ids = item_ids.to(device)
        targets = targets.float().to(device)

        predictions = model(user_ids, item_ids)
        loss = model.loss(predictions, targets)
        test_loss += loss.item() * user_ids.size(0)

        preds = torch.sigmoid(predictions)
        all_preds.append(preds.cpu())
        all_targets.append(targets.cpu())

all_preds = torch.cat(all_preds)
all_targets = torch.cat(all_targets)

test_loss /= len(test_loader.dataset)
test_auc = roc_auc_score(all_targets, all_preds)
test_acc = accuracy_score(all_targets, (all_preds >= 0.5).int())

print(
    f"Test Loss: {test_loss:.4f}, Test AUC: {test_auc:.4f}, Test Accuracy: {test_acc:.4f}"
)

writer.add_scalar("Loss/test", test_loss)
writer.add_scalar("AUC/test", test_auc)
writer.add_scalar("Accuracy/test", test_acc)

Test Loss: 0.5925, Test AUC: 0.5881, Test Accuracy: 0.7313


In [14]:
writer.close()