In [1]:
import os
import sys
import random
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import DataLoader, Subset

In [2]:
sys.path.append("..")

In [3]:
# NEXT STEPS:
# 1. Create Nearest Neighbour model - done
# 2. Use pytorch attention - ??
# 3. Add causality constraint - done
# 4. Experiment with + | * residual connections - done
# 5. Add positional encoding - done

In [3]:
from src.data import CARCADataset, load_attrs, load_ctx, load_profiles, set_datapath
from src.carca import AttrCtxEmbedding, AllEmbedding, AttrEmbedding, IdEmbedding
from src.carca import IdentityEncoding, LearnableEncoding, PositionalEncoding
from src.carca import CARCA, SelfAttentionBlock, CrossAttentionBlock, DotProduct
from src.knn import KNN
from src.train import train, evaluate

In [5]:
set_datapath("../../data/embedding_experiment/video_games/")

In [6]:
# attrs = load_attrs("video_games_sbert_5core.dat")
# ctx = load_ctx("video_games_ctx_5core.dat")
# user_ids, item_ids, profiles = load_profiles("video_games_sorted_5core.txt")

In [7]:
attrs = load_attrs("video_games_sbert_5core.dat")
ctx = load_ctx("video_games_ctx_5core.dat")
user_ids, item_ids, profiles = load_profiles("video_games_sorted_5core.txt")

In [8]:
exp_name = "id_resadd_causal_positions"
n_items = attrs.shape[0]
n_ctx = next(iter(ctx.values())).shape[0]
n_attrs = attrs.shape[1]

In [9]:
# Hyper-parameters
learning_rate =  0.001
seq_len = 30
n_blocks = 2
n_heads = 1
dropout_rate = 0.5
l2_reg = 0.0
d_dim = 50
g_dim = 250
residual_sa = True
residual_ca = True
epochs = 500
batch_size = 256
beta1 = 0.9
beta2 = 0.98

In [10]:
train_data = CARCADataset(
    user_ids=user_ids,
    item_ids=item_ids,
    profiles=profiles,
    attrs=attrs,
    ctx=ctx,
    profile_seq_len=seq_len,
    target_seq_len=100,
    mode="train"
)
val_data = CARCADataset(
    user_ids=user_ids,
    item_ids=item_ids,
    profiles=profiles,
    attrs=attrs,
    ctx=ctx,
    profile_seq_len=seq_len,
    target_seq_len=100,
    mode="val"
)
test_data = CARCADataset(
    user_ids=user_ids,
    item_ids=item_ids,
    profiles=profiles,
    attrs=attrs,
    ctx=ctx,
    profile_seq_len=seq_len,
    target_seq_len=100,
    mode="test"
)

val_idx = random.sample(range(len(val_data)), 10_000) if len(val_data) > 10_000 else range(len(val_data))
val_sub = Subset(val_data, val_idx)
test_idx = random.sample(range(len(test_data)), 10_000) if len(test_data) > 10_000 else range(len(test_data))
test_sub = Subset(test_data, test_idx)

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=0)
val_loader = DataLoader(val_sub, batch_size=batch_size, shuffle=False, num_workers=0)
test_loader = DataLoader(test_sub, batch_size=batch_size, shuffle=False, num_workers=0)

In [11]:
# emb = AttrCtxEmbedding(d=d_dim, g=g_dim, n_ctx=n_ctx, n_attrs=n_attrs)
pos = LearnableEncoding(d=d_dim, max_len=seq_len)
# enc = LearnableEncoding(seq_len=seq_len, d=d_dim)
emb = IdEmbedding(n_items=n_items, d=d_dim, pos=pos)
enc = nn.ModuleList([SelfAttentionBlock(d_dim, n_heads, dropout_rate, residual_sa) for _ in range(n_blocks)])
# dec = CrossAttentionBlock(d_dim, n_heads, dropout_rate, residual_ca)
dec = DotProduct()

model = CARCA(d=d_dim, p=dropout_rate, emb=emb, enc=enc, dec=dec)

In [12]:
device = "cuda"
model = model.to(device)

In [13]:
optim = Adam(model.parameters(), lr=learning_rate, weight_decay=l2_reg, betas=(beta1, beta2))

In [None]:
model = train(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    test_loader=test_loader,
    device=device,
    optim=optim,
    epochs=epochs,
    early_stop=50,
    datadir=f"./{exp_name}"
    # scheduler=scheduler
)

In [10]:
model = KNN()

In [15]:
evaluate(model, test_loader, "cuda", 10)

(0.3941, 0.24710776491165162, nan)

### Training loop accross different clients

In [4]:
# Hyper-parameters
learning_rate =  0.001
seq_len = 30
n_blocks = 2
n_heads = 1
dropout_rate = 0.5
l2_reg = 0.0
d_dim = 50
g_dim = 250
residual_sa = True
residual_ca = True
epochs = 500
batch_size = 256
beta1 = 0.9
beta2 = 0.98
early_stop = 20

In [5]:
clients = ["andreashop.sk", "mojeparty.cz", "laskakit.cz", "vitalcountry.cz"]

In [6]:
for client in clients:
    set_datapath(f"../../data/{client}")

    attr_files = [f for f in os.listdir(f"../../data/{client}") if f.startswith("dummy_attrs_")]
    ctx_files = [f for f in os.listdir(f"../../data/{client}") if f.startswith("ctx_")]
    profile_files = [f for f in os.listdir(f"../../data/{client}") if f.startswith("profiles_")]

    for attr_f, ctx_f, profile_f in zip(attr_files, ctx_files, profile_files):
        a_num = int(attr_f[attr_f.rfind("_") + 1 : attr_f.find(".")])
        c_num = int(ctx_f[ctx_f.rfind("_") + 1 : ctx_f.find(".")])
        p_num = int(profile_f[profile_f.rfind("_") + 1 : profile_f.find(".")])

        assert a_num == c_num == p_num

        attrs = load_attrs(attr_f)
        ctx = load_ctx(ctx_f)
        user_ids, item_ids, profiles = load_profiles(profile_f)

        n_items = attrs.shape[0]
        n_ctx = next(iter(ctx.values())).shape[0]
        n_attrs = attrs.shape[1]

        train_data = CARCADataset(
            user_ids=user_ids,
            item_ids=item_ids,
            profiles=profiles,
            attrs=attrs,
            ctx=ctx,
            profile_seq_len=seq_len,
            target_seq_len=100,
            mode="train",
            test=False
        )
        val_data = CARCADataset(
            user_ids=user_ids,
            item_ids=item_ids,
            profiles=profiles,
            attrs=attrs,
            ctx=ctx,
            profile_seq_len=seq_len,
            target_seq_len=100,
            mode="val",
            test=False
        )

        val_idx = random.sample(range(len(val_data)), 10_000) if len(val_data) > 10_000 else range(len(val_data))
        val_sub = Subset(val_data, val_idx)
        train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=2)
        val_loader = DataLoader(val_sub, batch_size=batch_size, shuffle=False, num_workers=0)

        pos = IdentityEncoding()
        emb = IdEmbedding(n_items=n_items, d=d_dim, pos=pos)
        enc = nn.ModuleList([SelfAttentionBlock(d_dim, n_heads, dropout_rate, residual_sa) for _ in range(n_blocks)])
        dec = DotProduct()
        
        model = CARCA(d=d_dim, p=dropout_rate, emb=emb, enc=enc, dec=dec)
        device = "cuda"
        model = model.to(device)
        optim = Adam(model.parameters(), lr=learning_rate, weight_decay=l2_reg, betas=(beta1, beta2))

        model = train(
            model=model,
            train_loader=train_loader,
            val_loader=val_loader,
            test_loader=None,
            device=device,
            optim=optim,
            epochs=epochs,
            early_stop=early_stop,
            datadir=f"../results/{client}/{a_num}"
            # scheduler=scheduler
        )

14:33:36 - Epoch 001: Train Loss = 0.7139
14:33:37 - Epoch 001: Val Loss = 0.7114 HR = 0.2720, NDCG = 0.1774
14:33:41 - Epoch 002: Train Loss = 0.6332
14:33:42 - Epoch 002: Val Loss = 0.6794 HR = 0.3566, NDCG = 0.2157
14:33:46 - Epoch 003: Train Loss = 0.5590
14:33:46 - Epoch 003: Val Loss = 0.6679 HR = 0.3587, NDCG = 0.2139
14:33:50 - Epoch 004: Train Loss = 0.5070
14:33:51 - Epoch 004: Val Loss = 0.6504 HR = 0.3675, NDCG = 0.2193
14:33:55 - Epoch 005: Train Loss = 0.4811
14:33:56 - Epoch 005: Val Loss = 0.6134 HR = 0.3820, NDCG = 0.2309
14:34:00 - Epoch 006: Train Loss = 0.4483
14:34:01 - Epoch 006: Val Loss = 0.5923 HR = 0.3973, NDCG = 0.2423
14:34:05 - Epoch 007: Train Loss = 0.4193
14:34:06 - Epoch 007: Val Loss = 0.5725 HR = 0.4131, NDCG = 0.2563
14:34:10 - Epoch 008: Train Loss = 0.3939
14:34:11 - Epoch 008: Val Loss = 0.5467 HR = 0.4273, NDCG = 0.2745
14:34:15 - Epoch 009: Train Loss = 0.3630
14:34:16 - Epoch 009: Val Loss = 0.5283 HR = 0.4364, NDCG = 0.2839
14:34:19 - Epoch 01