In [1]:
%load_ext autoreload
%autoreload 2

import IPython
from pathlib import Path
import os
locals = IPython.extract_module_locals() # type: ignore
notebook_name = "/".join(locals[1]["__vsc_ipynb_file__"].split("/"))
os.chdir(Path(notebook_name).parent.parent)

In [2]:
import polars as pl
from torch.utils.data import DataLoader
from retail_recommender_system.models.deepfm import DeepFMDataset, collate_fn, DeepFM, DeepFMModelConfig, DeepFMEvalDataset
from retail_recommender_system.data.loader import load_dataset, DataConfig

In [3]:
dataset = load_dataset(DataConfig(dataset="hm", prefix="frac_0_01"))

In [4]:
train_dataset = DeepFMDataset(
    relations=dataset.data["relations"],
    users=dataset.data["users"],
    items=dataset.data["items"],
    neg_sampl=1,
)

In [5]:
train_dataset[0]

{'u_id': tensor([2895, 2895], dtype=torch.int32),
 'i_id': tensor([12224, 25544], dtype=torch.int32),
 'u_attr': tensor([[[ 0.,  0., 52.,  0.,  0.,  1.,  0.,  1.,  0.]],
 
         [[ 0.,  0., 52.,  0.,  0.,  1.,  0.,  1.,  0.]]]),
 'i_attr': tensor([[[0., 0., 0.,  ..., 0., 0., 0.]],
 
         [[0., 0., 0.,  ..., 0., 0., 0.]]]),
 'target': tensor([1., 0.])}

In [6]:
eval_dataset = DeepFMEvalDataset(
    base_dataset=train_dataset,
    user_batch_size=100)

In [7]:
b = next(iter(eval_dataset))

In [8]:
b

{'u_id': tensor([ 0,  0,  0,  ..., 99, 99, 99], dtype=torch.int32),
 'i_id': tensor([    0,     1,     2,  ..., 51802, 51803, 51804], dtype=torch.int32),
 'u_attr': tensor([[ 0.,  0., 25.,  ...,  0.,  1.,  0.],
         [ 0.,  0., 25.,  ...,  0.,  1.,  0.],
         [ 0.,  0., 25.,  ...,  0.,  1.,  0.],
         ...,
         [ 0.,  0., 28.,  ...,  0.,  1.,  0.],
         [ 0.,  0., 28.,  ...,  0.,  1.,  0.],
         [ 0.,  0., 28.,  ...,  0.,  1.,  0.]]),
 'i_attr': tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]),
 'target': tensor([0., 0., 0.,  ..., 0., 0., 0.])}

In [28]:
train_loader = DataLoader(train_dataset, batch_size=5, shuffle=True, collate_fn=collate_fn)

In [None]:
next(iter(train_loader))

In [38]:
model = DeepFM(DeepFMModelConfig(
    n_users=dataset.n_users, 
    n_items=dataset.n_items, 
    user_attr_size=train_dataset.user_attr_size,
    item_attr_size=train_dataset.item_attr_size,
    emb_size=32
))

In [None]:
model