In [1]:
import sys
import random
from torch.optim import Adam
from torch.utils.data import DataLoader, Subset

In [2]:
sys.path.append("..")

In [None]:
# NEXT STEPS:
# 1. Create naive Nearest Neighbour model
# 2. Use pytorch attention
# 3. Add causality constraint
# 4. Experiment with + | * residual connections
# 5. Add positional encoding

In [3]:
from carca.data import CARCADataset, load_attrs, load_ctx, load_profiles, set_datapath
from carca.model.carca import CARCA
from carca.model.embedding import AttrCtxEmbedding, AllEmbedding, AttrEmbedding, IdEmbedding
from carca.train import train

In [4]:
set_datapath("../../data/embedding_experiment/video_games/")

In [5]:
attrs = load_attrs("video_games_sbert_5core.dat")
ctx = load_ctx("video_games_ctx_5core.dat")
user_ids, item_ids, profiles = load_profiles("video_games_sorted_5core.txt")

In [6]:
exp_name = "id"
n_items = attrs.shape[0]
n_ctx = next(iter(ctx.values())).shape[0]
n_attrs = attrs.shape[1]

In [7]:
# Hyper-parameters
learning_rate =  0.0001
seq_len = 50
n_blocks = 3
n_heads = 3
dropout_rate = 0.5
l2_reg = 0.0
d_dim = 90
g_dim = 450
residual_sa = True
residual_ca = True
epochs = 800
batch_size = 128
beta1 = 0.9
beta2 = 0.98

In [8]:
train_data = CARCADataset(
    user_ids=user_ids,
    item_ids=item_ids,
    profiles=profiles,
    attrs=attrs,
    ctx=ctx,
    profile_seq_len=seq_len,
    target_seq_len=100,
    mode="train"
)
val_data = CARCADataset(
    user_ids=user_ids,
    item_ids=item_ids,
    profiles=profiles,
    attrs=attrs,
    ctx=ctx,
    profile_seq_len=seq_len,
    target_seq_len=100,
    mode="val"
)
test_data = CARCADataset(
    user_ids=user_ids,
    item_ids=item_ids,
    profiles=profiles,
    attrs=attrs,
    ctx=ctx,
    profile_seq_len=seq_len,
    target_seq_len=100,
    mode="test"
)

val_idx = random.sample(range(len(val_data)), 10_000) if len(val_data) > 10_000 else range(len(val_data))
val_sub = Subset(val_data, val_idx)
test_idx = random.sample(range(len(test_data)), 10_000) if len(test_data) > 10_000 else range(len(test_data))
test_sub = Subset(test_data, test_idx)

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_sub, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_sub, batch_size=batch_size, shuffle=False)

In [9]:
# emb = AttrCtxEmbedding(d=d_dim, g=g_dim, n_ctx=n_ctx, n_attrs=n_attrs)
emb = IdEmbedding(n_items=n_items, d=d_dim)
model = CARCA(d=d_dim, H=n_heads, p=dropout_rate, B=n_blocks, res_sa=residual_sa, res_ca=residual_ca, emb=emb)

In [10]:
# device = "cuda" if torch.cuda.is_available() else "cpu"
device = "cuda"
print(f"Using {device} device")
model = model.to(device)

Using cuda device


In [11]:
optim = Adam(model.parameters(), lr=learning_rate, weight_decay=l2_reg, betas=(beta1, beta2))

In [12]:
model = train(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    test_loader=test_loader,
    device=device,
    optim=optim,
    epochs=epochs,
    early_stop=50,
    datadir=f"./{exp_name}"
    # scheduler=scheduler
)

16:13:17 - Epoch 001: Train Loss = 0.6572
16:13:23 - Epoch 001: Val Loss = 0.5827 HR = 0.5409, NDCG = 0.3260
16:13:49 - Epoch 002: Train Loss = 0.5135
16:13:54 - Epoch 002: Val Loss = 0.5437 HR = 0.5466, NDCG = 0.3221
16:14:20 - Epoch 003: Train Loss = 0.4888
16:14:25 - Epoch 003: Val Loss = 0.5352 HR = 0.5442, NDCG = 0.3215
16:14:51 - Epoch 004: Train Loss = 0.4746
16:14:56 - Epoch 004: Val Loss = 0.5371 HR = 0.5358, NDCG = 0.3148
16:15:22 - Epoch 005: Train Loss = 0.4679
16:15:28 - Epoch 005: Val Loss = 0.5409 HR = 0.5372, NDCG = 0.3182
16:15:57 - Epoch 006: Train Loss = 0.4652
16:16:02 - Epoch 006: Val Loss = 0.5382 HR = 0.5395, NDCG = 0.3145
16:16:31 - Epoch 007: Train Loss = 0.4618
16:16:37 - Epoch 007: Val Loss = 0.5472 HR = 0.5425, NDCG = 0.3199
16:17:07 - Epoch 008: Train Loss = 0.4566
16:17:12 - Epoch 008: Val Loss = 0.5513 HR = 0.5401, NDCG = 0.3166
16:17:39 - Epoch 009: Train Loss = 0.4521
16:17:45 - Epoch 009: Val Loss = 0.5588 HR = 0.5325, NDCG = 0.3143
16:18:11 - Epoch 01