This notebook is heavily inspired by the examples found in the Transformers4Rec github repo from NVIDIA, [here](https://github.com/NVIDIA-Merlin/Transformers4Rec/blob/main/examples/).

## Check CUDA is working

In [1]:
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"
# device="cpu"

print(f'The main device is {device}')


The main device is cuda


In [2]:
import os
import glob

import numpy as np
import pandas as pd

import nvtabular as nvt
from nvtabular.ops import *
from merlin.schema.tags import Tags

In [3]:
if os.getcwd().split("/")[-1] == "notebooks":
    os.chdir("..")
root_dir = os.getcwd()

print(f'Root dir is {root_dir}')

Root dir is /home/noone/Projects/recsys


## Define data path

In [4]:
# external data
INPUT_DATA_DIR = os.environ.get("INPUT_DATA_DIR", os.path.join(root_dir,"data/external"))
print(f'Input data dir is {INPUT_DATA_DIR}')

Input data dir is /home/noone/Projects/recsys/data/external


## Create new random synthetic data

In [5]:
NUM_ROWS = os.environ.get("NUM_ROWS", 100000)

In [6]:
%%timeit
long_tailed_item_distribution = np.clip(np.random.lognormal(3., 1., int(NUM_ROWS)).astype(np.int32), 1, 50000)
# generate random item interaction features
df = pd.DataFrame(np.random.randint(70000, 90000, int(NUM_ROWS)), columns=['session_id'])
df['item_id'] = long_tailed_item_distribution

# generate category mapping for each item-id
df['category'] = pd.cut(df['item_id'], bins=334, labels=np.arange(1, 335)).astype(np.int32)
df['age_days'] = np.random.uniform(0, 1, int(NUM_ROWS)).astype(np.float32)
df['weekday_sin']= np.random.uniform(0, 1, int(NUM_ROWS)).astype(np.float32)

# generate day mapping for each session
map_day = dict(zip(df.session_id.unique(), np.random.randint(1, 10, size=(df.session_id.nunique()))))
df['day'] =  df.session_id.map(map_day)

27.3 ms ± 505 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [10]:
%%timeit
device="cuda"
SEED = 42
generator = torch.Generator(device=device)
generator.manual_seed(SEED)
std = torch.tensor([[-0.8166, -1.3802, -0.3560]],device=device)
data = torch.normal(mean=0., std=1., size=(1,NUM_ROWS), generator=generator, device=device, out=None)

50.6 µs ± 1.29 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [8]:
%%timeit
device="cpu"
SEED = 42
generator = torch.Generator(device='cpu')
generator.manual_seed(SEED)
std = torch.tensor([[-0.8166, -1.3802, -0.3560]],device="cpu")
data = torch.normal(mean=0., std=1., size=(1,NUM_ROWS), generator=generator, device="cpu", out=None)

492 µs ± 2.25 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [11]:
device="cpu"
SEED = 42
generator = torch.Generator(device='cpu')
generator.manual_seed(SEED)
std = torch.tensor([[-0.8166, -1.3802, -0.3560]],device="cpu")
data = torch.normal(mean=0., std=1., size=(1,NUM_ROWS), generator=generator, device="cpu", out=None)

In [12]:
data.device

device(type='cpu')

In [14]:
device="cuda"
SEED = 42
generator = torch.Generator(device=device)
generator.manual_seed(SEED)
std = torch.tensor([[-0.8166, -1.3802, -0.3560]],device=device)
data = torch.normal(mean=0., std=1., size=(1,NUM_ROWS), generator=generator, device=device, out=None)
data.shape

torch.Size([1, 100000])

In [15]:
data.device

device(type='cuda', index=0)