In [1]:
import sys
import random
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import DataLoader, Subset

In [2]:
sys.path.append("..")

In [3]:
# NEXT STEPS:
# 1. Create Nearest Neighbour model - done
# 2. Use pytorch attention
# 3. Add causality constraint
# 4. Experiment with + | * residual connections
# 5. Add positional encoding

In [4]:
from src.data import CARCADataset, load_attrs, load_ctx, load_profiles, set_datapath
from src.data import CARCADataset, load_attrs_t, load_ctx_t
from src.carca import AttrCtxEmbedding, AllEmbedding, AttrEmbedding, IdEmbedding
from src.carca import IdentityEncoding, LearnableEncoding, PositionalEncoding
from src.carca import CARCA, SelfAttentionBlock, CrossAttentionBlock, DotProduct
from src.knn import KNN
from src.train import train, evaluate

In [5]:
set_datapath("../../data/embedding_experiment/video_games/")

In [6]:
# attrs = load_attrs("video_games_sbert_5core.dat")
# ctx = load_ctx("video_games_ctx_5core.dat")
# user_ids, item_ids, profiles = load_profiles("video_games_sorted_5core.txt")

In [7]:
attrs = load_attrs("video_games_sbert_5core.dat")
ctx = load_ctx("video_games_ctx_5core.dat")
user_ids, item_ids, profiles = load_profiles("video_games_sorted_5core.txt")

In [8]:
exp_name = "id_resadd_causal_positions"
n_items = attrs.shape[0]
n_ctx = next(iter(ctx.values())).shape[0]
n_attrs = attrs.shape[1]

In [9]:
# Hyper-parameters
learning_rate =  0.001
seq_len = 50
n_blocks = 2
n_heads = 1
dropout_rate = 0.5
l2_reg = 0.0
d_dim = 50
g_dim = 450
residual_sa = True
residual_ca = True
epochs = 800
batch_size = 256
beta1 = 0.9
beta2 = 0.98

In [10]:
train_data = CARCADataset(
    user_ids=user_ids,
    item_ids=item_ids,
    profiles=profiles,
    attrs=attrs,
    ctx=ctx,
    profile_seq_len=seq_len,
    target_seq_len=100,
    mode="train"
)
val_data = CARCADataset(
    user_ids=user_ids,
    item_ids=item_ids,
    profiles=profiles,
    attrs=attrs,
    ctx=ctx,
    profile_seq_len=seq_len,
    target_seq_len=100,
    mode="val"
)
test_data = CARCADataset(
    user_ids=user_ids,
    item_ids=item_ids,
    profiles=profiles,
    attrs=attrs,
    ctx=ctx,
    profile_seq_len=seq_len,
    target_seq_len=100,
    mode="test"
)

val_idx = random.sample(range(len(val_data)), 10_000) if len(val_data) > 10_000 else range(len(val_data))
val_sub = Subset(val_data, val_idx)
test_idx = random.sample(range(len(test_data)), 10_000) if len(test_data) > 10_000 else range(len(test_data))
test_sub = Subset(test_data, test_idx)

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=0)
val_loader = DataLoader(val_sub, batch_size=batch_size, shuffle=False, num_workers=0)
test_loader = DataLoader(test_sub, batch_size=batch_size, shuffle=False, num_workers=0)

In [11]:
# emb = AttrCtxEmbedding(d=d_dim, g=g_dim, n_ctx=n_ctx, n_attrs=n_attrs)
pos = LearnableEncoding(d=d_dim, max_len=seq_len)
# enc = LearnableEncoding(seq_len=seq_len, d=d_dim)
emb = IdEmbedding(n_items=n_items, d=d_dim, pos=pos)
enc = nn.ModuleList([SelfAttentionBlock(d_dim, n_heads, dropout_rate, residual_sa) for _ in range(n_blocks)])
# dec = CrossAttentionBlock(d_dim, n_heads, dropout_rate, residual_ca)
dec = DotProduct()

model = CARCA(d=d_dim, p=dropout_rate, emb=emb, enc=enc, dec=dec)

In [12]:
device = "cuda"
model = model.to(device)

In [13]:
optim = Adam(model.parameters(), lr=learning_rate, weight_decay=l2_reg, betas=(beta1, beta2))

In [14]:
model = train(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    test_loader=test_loader,
    device=device,
    optim=optim,
    epochs=epochs,
    early_stop=50,
    datadir=f"./{exp_name}"
    # scheduler=scheduler
)

KeyboardInterrupt: 

In [10]:
model = KNN()

In [15]:
evaluate(model, test_loader, "cuda", 10)

(0.3941, 0.24710776491165162, nan)