In [1]:
import torch




clicked_articles = [
    [ [5, 12, 34],      [7, 8]         ],               # user 0: 2 articles
    [ [9, 21, 33, 45],  [2],         [11, 13, 17] ],     # user 1: 3 articles
    [ [14, 15, 16, 17, 18] ]                           # user 2: 1 article
]

PAD_ID = 0


B = len(clicked_articles)                                                        # No. of users
N = max(len(user_list) for user_list in clicked_articles)                        # Max No. of articles
L = max(len(article) for user_list in clicked_articles for article in user_list) # Max No. of tokens in an article

print(f"B = {B}, N = {N}, L = {L}")

# Initialize clicked_token_ids with PAD_ID
clicked_token_ids = torch.full((B, N, L), PAD_ID, dtype=torch.long)

# Initialize clicked_token_mask with True
clicked_token_mask = torch.ones((B, N, L), dtype=torch.bool)

for b, user_list in enumerate(clicked_articles):
    for i, article_tokens in enumerate(user_list):
        length = len(article_tokens)

        clicked_token_ids[b, i, :length] = torch.tensor(article_tokens, dtype=torch.long)    
        clicked_token_mask[b, i, :length] = False

print("clicked_token_ids:")
print(clicked_token_ids)
print("\nclicked_token_mask (True = PAD):")
print(clicked_token_mask)


# Fully padded articles are ones where all tokens are padding
clicked_slot_mask = clicked_token_mask.all(dim=2)  # shape (B, N)

print("\nclicked_slot_mask (True = this slot is entirely padding):")
print(clicked_slot_mask)

B = 3, N = 3, L = 5
clicked_token_ids:
tensor([[[ 5, 12, 34,  0,  0],
         [ 7,  8,  0,  0,  0],
         [ 0,  0,  0,  0,  0]],

        [[ 9, 21, 33, 45,  0],
         [ 2,  0,  0,  0,  0],
         [11, 13, 17,  0,  0]],

        [[14, 15, 16, 17, 18],
         [ 0,  0,  0,  0,  0],
         [ 0,  0,  0,  0,  0]]])

clicked_token_mask (True = PAD):
tensor([[[False, False, False,  True,  True],
         [False, False,  True,  True,  True],
         [ True,  True,  True,  True,  True]],

        [[False, False, False, False,  True],
         [False,  True,  True,  True,  True],
         [False, False, False,  True,  True]],

        [[False, False, False, False, False],
         [ True,  True,  True,  True,  True],
         [ True,  True,  True,  True,  True]]])

clicked_slot_mask (True = this slot is entirely padding):
tensor([[False, False,  True],
        [False, False, False],
        [False,  True,  True]])
