In [1]:
import math
import random
import numpy as np
import pandas as pd
from tqdm import tqdm
from collections import defaultdict
import os
import time
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.utils as nn_utils
from torch.utils.data import Dataset, DataLoader
import torch.cuda.amp as amp
from torch.cuda.amp import autocast, GradScaler

import itertools
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR

# from box import Box

import warnings

import FastAttention2 as FastAttention
from FastAttention2 import kernel

In [2]:
config = {
    'data_path' : '/gpfs/u/home/TMSR/TMSRvldn/scratch/recsys/ratings.dat',
    'max_len' : 200, # maximum length of input sequences
    'hidden_units' : 256, # Embedding size
    'num_heads' : 2, # Multi-head layer
    'num_layers': 2, # block (encoder layer)
    'dropout_rate' : 0.1, # dropout rate for regularization
    'lr' : 0.001,
    'batch_size' : 128,
    'num_epochs' : 25,
    'num_workers' : 2,
    'mask_prob' : 0.15, # for cloze task
    'weight_decay' : 0.001,
    'grad_clip' : 1.0,
}

seed = 42  
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


# Dataset

In [3]:
class MakeSequenceDataSet():
    """
    SequenceData
    """
    def __init__(self, config):
        self.df = pd.read_csv(config['data_path'], sep="::", names=["UserID", "MovieID", "Ratings", "Timestamp"], engine='python')
        # create encoders and decoders for both movieID and userID
        self.item_encoder, self.item_decoder = self.generate_encoder_decoder('MovieID')
        self.user_encoder, self.user_decoder = self.generate_encoder_decoder('UserID')
        self.num_item, self.num_user = len(self.item_encoder), len(self.user_encoder)

        # converting user and item IDs into numerical indices
        self.df['item_idx'] = self.df['MovieID'].apply(lambda x : self.item_encoder[x] + 1) # takes each movieID, looks up its corr. index in the self.item_encoder dictionary, then increments the label by 1. (WHY?)
        self.df['user_idx'] = self.df['UserID'].apply(lambda x : self.user_encoder[x]) # converts each userID into a numerical index using the self.user_encoder dictionary. The result is stored in the user_idx column
        # sorting by user_idx and timestamp ensures that the data fed into the model respects the order of events which is critical for any sequence prediction task
        self.df = self.df.sort_values(['user_idx', 'Timestamp'])
        self.user_train, self.user_valid = self.generate_sequence_data() # diff

    # facilitates the transformation of categorical data into a numerical format that can be processed by neural networks
    def generate_encoder_decoder(self, col : str) -> dict:
        """
        encoder, decoder

        Args:
            col (str):  columns
        Returns:
            dict: user encoder, decoder
        """
        # takes a column name and generates dictionaries to map unique values to indices (encoder) and indicies back to values (decoder)
        # encoders and decoders are useful for converting categorical variables into indices which are more manageable for models and for interpreting the outputs of the model.
        encoder = {}
        decoder = {}
        ids = self.df[col].unique()

        for idx, _id in enumerate(ids):
            encoder[_id] = idx
            decoder[idx] = _id

        return encoder, decoder

    # creates sequences of interactions for each user. This is done by iterating through sorted data to maintain the temporal order of interactions
    # the result is a dictionary where each key is a user_id and the value is a list of items interacted with by the user
    def generate_sequence_data(self) -> dict:
        """
        sequence_data

        Returns:
            dict: train user sequence / valid user sequence
        """
        users = defaultdict(list)
        user_train = {}
        user_valid = {}
        group_df = self.df.groupby('user_idx')
        for user, item in group_df:
            users[user].extend(item['item_idx'].tolist())

        for user in users:
            user_train[user] = users[user][:-1]
            user_valid[user] = [users[user][-1]]

        return user_train, user_valid

    def get_train_valid_data(self):
        return self.user_train, self.user_valid

In [4]:
s = MakeSequenceDataSet(config)

In [5]:
# handles data preprocessing for a BERT-based model
class BERTRecDataSet(Dataset):
    def __init__(self, user_train, max_len, num_user, num_item, mask_prob):
        self.user_train = user_train
        self.max_len = max_len
        self.num_user = num_user
        self.num_item = num_item
        self.mask_prob = mask_prob
        self._all_items = set([i for i in range(1, self.num_item + 1)])

    def __len__(self):
        #  user sequence
        return self.num_user

    def __getitem__(self, user):

        user_seq = self.user_train[user]
        tokens = []
        labels = []
        for s in user_seq[-self.max_len:]:
            prob = np.random.random()
            if prob < self.mask_prob:
                prob /= self.mask_prob
                if prob < 0.8:
                    # masking
                    tokens.append(self.num_item + 1)  # mask_index: num_item + 1, 0: pad, 1~num_item: item index
                elif prob < 0.9:
                    # noise
                    tokens.extend(self.random_neg_sampling(rated_item = user_seq, num_item_sample = 1))  # item random sampling
                else:
                    tokens.append(s)
                labels.append(s)
            else:
                tokens.append(s)
                labels.append(0)

        mask_len = self.max_len - len(tokens)
        tokens = [0] * mask_len + tokens
        labels = [0] * mask_len + labels

        return torch.LongTensor(tokens), torch.LongTensor(labels)

    def random_neg_sampling(self, rated_item : list, num_item_sample : int):
        nge_samples = random.sample(list(self._all_items - set(rated_item)), num_item_sample)
        return nge_samples


# Model Architecture

In [6]:
class PositionalEmbedding(nn.Module):
    def __init__(self, max_len, d_model): # d_model is the size of the embeddings (the size of each token's embedding vector)
        super().__init__()

        # Compute the positional encodings once in log space.
        self.pe = nn.Embedding(max_len, d_model)

    def forward(self, x):
        batch_size, seq_len = x.shape  # Get actual sequence length
        position_ids = torch.arange(seq_len, dtype=torch.long, device=x.device).unsqueeze(0).expand(batch_size, seq_len)
        return self.pe(position_ids)  # Correctly index positional embeddings

class TokenEmbedding(nn.Embedding):
    def __init__(self, vocab_size, embed_size=512):
        super().__init__(vocab_size, embed_size, padding_idx=0)

In [7]:
class BERTEmbedding(nn.Module):
    """
    BERT Embedding which is consisted with under features
        1. TokenEmbedding : normal embedding matrix
        2. PositionalEmbedding : adding positional information using sin, cos
        2. SegmentEmbedding : adding sentence segment info, (sent_A:1, sent_B:2)

        sum of all these features are output of BERTEmbedding
    """

    def __init__(self, vocab_size, embed_size, max_len, dropout=0.1):
        """
        :param vocab_size: total vocab size
        :param embed_size: embedding size of token embedding
        :param dropout: dropout rate
        """
        super().__init__()
        self.token = TokenEmbedding(vocab_size=vocab_size, embed_size=embed_size)
        self.position = PositionalEmbedding(max_len=max_len, d_model=embed_size)
        # self.segment = SegmentEmbedding(embed_size=self.token.embedding_dim)
        self.dropout = nn.Dropout(p=dropout)
        #self.embed_size = embed_size

    def forward(self, sequence):
        x = self.token(sequence) + self.position(sequence)
        return self.dropout(x)

In [8]:
b = BERTEmbedding(26744, 768, 150, dropout=0.1)

In [9]:
b.position.pe

Embedding(150, 768)

In [10]:
import torch
import FastAttention2 as FastAttention
from FastAttention2 import kernel

class CustomAttention(torch.autograd.Function):
    @staticmethod
    def forward(ctx, Q, K, V):
        # Make sure Q,K,V are contiguous
        Q = Q.contiguous()
        K = K.contiguous()
        V = V.contiguous()
        
        # If Q is not float32 or float16, cast to float32
        if Q.dtype not in (torch.float32, torch.float16):
            Q = Q.float()
            K = K.float()
            V = V.float()

        ctx.save_for_backward(Q, K, V)

        # Use 0 to indicate "no causal mask" (bidirectional).
        inplace = False
        if Q.dtype == torch.float32:
            return FastAttention.kernel.float32(Q, K, V, 0, inplace, False)
        elif Q.dtype == torch.float16:
            return FastAttention.kernel.float16(Q, K, V, 0, inplace, False)
        else:
            raise ValueError("Only float32/float16 are supported")

    @staticmethod
    def backward(ctx, grad_output):
        Q, K, V = ctx.saved_tensors
        Q = Q.contiguous()
        K = K.contiguous()
        V = V.contiguous()
        grad_output = grad_output.contiguous()
        inplace = False

        if Q.dtype == torch.float32:
            grad_Q = FastAttention.kernel.float32(grad_output, V, K, 0, inplace, False)
            grad_K = FastAttention.kernel.float32(V, grad_output, Q, 0, inplace, True)
            grad_V = FastAttention.kernel.float32(K, Q, grad_output, 0, inplace, True)
        elif Q.dtype == torch.float16:
            grad_Q = FastAttention.kernel.float16(grad_output, V, K, 0, inplace, False)
            grad_K = FastAttention.kernel.float16(V, grad_output, Q, 0, inplace, True)
            grad_V = FastAttention.kernel.float16(K, Q, grad_output, 0, inplace, True)
        else:
            raise ValueError("Only float32/float16 are supported")

        return grad_Q, grad_K, grad_V


In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from types import SimpleNamespace

class BertCosAttention(nn.Module):
    def __init__(self, h, d_model, dropout=0.1):
        super().__init__()
        # Build a mini config object
        config = SimpleNamespace(
            hidden_size=d_model,
            num_attention_heads=h,
            attention_probs_dropout_prob=dropout,
            is_decoder=False
        )
        
        # Now do the same logic as before:
        if config.hidden_size % config.num_attention_heads != 0:
            raise ValueError("hidden_size must be multiple of num_attention_heads")

        self.num_attention_heads = config.num_attention_heads
        self.attention_head_size = config.hidden_size // config.num_attention_heads
        self.all_head_size = self.num_attention_heads * self.attention_head_size

        self.query = nn.Linear(config.hidden_size, self.all_head_size)
        self.key   = nn.Linear(config.hidden_size, self.all_head_size)
        self.value = nn.Linear(config.hidden_size, self.all_head_size)

        # Learnable constant for scaling
        self.norm_const = nn.Parameter(0.5 * torch.ones(1, self.num_attention_heads, 1, 1))
        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
        self.is_decoder = config.is_decoder

    def transpose_for_scores(self, x):
        new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
        x = x.view(new_x_shape)
        return x.permute(0, 2, 1, 3)

    def forward(self, query, key=None, value=None, mask=None):
        """
        A minimal forward signature matching your old MultiHeadedAttention.
        If you want to pass just 'query' and do everything in one tensor, that's fine.
        Or you can separate Q,K,V. 
        """
        if key is None:  # If you're only passing 'query'
            key = query
            value = query

        # Project Q,K,V
        Q = self.query(query)
        K = self.key(key)
        V = self.value(value)

        # [batch, seq, hidden] => [batch, heads, seq, head_dim]
        Q = self.transpose_for_scores(Q)
        K = self.transpose_for_scores(K)
        V = self.transpose_for_scores(V)

        # If there's a mask, zero out Q,K,V where mask=0
        if mask is not None:
            mask = mask.unsqueeze(1).unsqueeze(-1)  # shape [batch, heads, seq, 1]
            Q = Q * mask
            K = K * mask
            V = V * mask

        # Normalize Q,K for cosine
        Q = F.normalize(Q, dim=-1)
        K = F.normalize(K, dim=-1)

        # Scale V by seq^( -sigmoid(m) )
        if mask is not None:
            # sum over seq dimension
            seq_counts = mask.squeeze(-1).sum(-1, keepdim=True).unsqueeze(-1) 
            # shape [batch, heads, 1, 1]
        else:
            seq_counts = torch.full(
                (V.size(0), V.size(1), 1, 1),
                float(V.size(2)), device=V.device
            )
        scale = (seq_counts ** self.norm_const.sigmoid()).clamp(min=1)
        V = V / scale

        context_layer = CustomAttention.apply(Q, K, V)  # shape [batch, heads, seq, head_dim]

        # Reshape back
        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
        new_shape = context_layer.size()[:-2] + (self.all_head_size,)
        context_layer = context_layer.view(new_shape)

        # Optional: final linear or dropout
        context_layer = self.dropout(context_layer)
        return context_layer


In [12]:
class GELU(nn.Module):
    """
    Paper Section 3.4, last paragraph notice that BERT used the GELU instead of RELU
    """

    def forward(self, x):
        return 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))

In [13]:
class PositionwiseFeedForward(nn.Module):
    "Implements FFN equation."

    def __init__(self, d_model, d_ff, dropout=0.1):
        super(PositionwiseFeedForward, self).__init__()
        self.w_1 = nn.Linear(d_model, d_ff)
        self.w_2 = nn.Linear(d_ff, d_model)
        self.dropout = nn.Dropout(dropout)
        self.activation = GELU()

    def forward(self, x):
        return self.w_2(self.dropout(self.activation(self.w_1(x))))

In [14]:
class LayerNorm(nn.Module):
    "Construct a layernorm module (See citation for details)."

    def __init__(self, features, eps=1e-6):
        super(LayerNorm, self).__init__()
        self.a_2 = nn.Parameter(torch.ones(features))
        self.b_2 = nn.Parameter(torch.zeros(features))
        self.eps = eps

    def forward(self, x):
        mean = x.mean(-1, keepdim=True)
        std = x.std(-1, keepdim=True)
        return self.a_2 * (x - mean) / (std + self.eps) + self.b_2

In [15]:
class SublayerConnection(nn.Module):
    """
    A residual connection followed by a layer norm.
    Note for code simplicity the norm is first as opposed to last.
    """

    def __init__(self, size, dropout):
        super(SublayerConnection, self).__init__()
        self.norm = LayerNorm(size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, sublayer):
        "Apply residual connection to any sublayer with the same size."
        return x + self.dropout(sublayer(self.norm(x)))

In [16]:
class TransformerBlock(nn.Module):
    """
    Bidirectional Encoder = Transformer (self-attention)
    Transformer = MultiHead_Attention + Feed_Forward with sublayer connection
    """

    def __init__(self, hidden, attn_heads, feed_forward_hidden, dropout):
        """
        :param hidden: hidden size of transformer
        :param attn_heads: head sizes of multi-head attention
        :param feed_forward_hidden: feed_forward_hidden, usually 4*hidden_size
        :param dropout: dropout rate
        """

        super().__init__()
        self.attention = BertCosAttention(h=attn_heads, d_model=hidden, dropout=dropout)
        self.feed_forward = PositionwiseFeedForward(d_model=hidden, d_ff=feed_forward_hidden, dropout=dropout)
        self.input_sublayer = SublayerConnection(size=hidden, dropout=dropout)
        self.output_sublayer = SublayerConnection(size=hidden, dropout=dropout)
        self.dropout = nn.Dropout(p=dropout)

    def forward(self, x, mask):
        x = self.input_sublayer(x, lambda _x: self.attention(_x, _x, _x, mask))
        x = self.output_sublayer(x, self.feed_forward)
        return self.dropout(x)

In [17]:
class BERT(nn.Module):
    def __init__(self, bert_max_len, num_items, bert_num_blocks, bert_num_heads,
                 bert_hidden_units, bert_dropout):
        super().__init__()

        # fix_random_seed_as(args.model_init_seed)
        # self.init_weights()

        max_len = bert_max_len
        num_items = num_items
        n_layers = bert_num_blocks
        heads = bert_num_heads
        vocab_size = num_items + 2
        hidden = bert_hidden_units
        self.hidden = hidden
        dropout = bert_dropout

        # embedding for BERT, sum of positional, segment, token embeddings
        self.embedding = BERTEmbedding(vocab_size=vocab_size, embed_size=self.hidden, max_len=max_len, dropout=dropout)

        # multi-layers transformer blocks, deep network
        self.transformer_blocks = nn.ModuleList(
            [TransformerBlock(hidden, heads, hidden * 4, dropout) for _ in range(n_layers)])
        self.out = nn.Linear(hidden, num_items + 1)

    def forward(self, x):
        mask = (x > 0).int()

        # embedding the indexed sequence to sequence of vectors
        x = self.embedding(x)

        # running over multiple transformer blocks
        for transformer in self.transformer_blocks:
            x = transformer(x, mask)

        x = self.out(x)
        return x

    def init_weights(self):
        pass

net = BERT(10, 10, 6, 8, 8, 0.4)

In [18]:
def train(model, criterion, optimizer, data_loader, scheduler=None, config=None, device='cuda'):
    """
    Training loop with AMP, warmup, weight decay, and gradient clipping.
    """
    model.train()
    scaler = GradScaler()
    loss_val = 0.0

    # We'll track the total steps to call scheduler.step() if needed
    current_step = 0

    for seq, labels in tqdm(data_loader):
        seq = seq.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        # AMP autocast
        with autocast():
            logits = model(seq)
            logits = logits.view(-1, logits.size(-1))
            labels = labels.view(-1)
            loss = criterion(logits, labels)

        # Backprop
        scaler.scale(loss).backward()

        # Unscale the gradients for gradient clipping
        scaler.unscale_(optimizer)

        # Now clip, e.g. 1.0
        nn_utils.clip_grad_norm_(model.parameters(), config['grad_clip'])

        # Take an optimizer step
        scaler.step(optimizer)
        scaler.update()

        # If we have a scheduler, step it
        if scheduler is not None:
            scheduler.step()

        loss_val += loss.item()
        current_step += 1

    loss_val /= len(data_loader)
    return loss_val

    
def evaluate(model, user_train, user_valid, max_len, data_loader, bert4rec_dataset, make_sequence_dataset):
    model.eval()

    NDCG = 0.0 # NDCG@10
    HIT = 0.0 # HIT@10

    num_item_sample = 100

    users = [user for user in range(make_sequence_dataset.num_user)]

    for user in tqdm(users):
        seq = (user_train[user] + [make_sequence_dataset.num_item + 1])[-max_len:] # mask last token
        padding_len = max_len - len(seq)
        seq = [0] * padding_len + seq
        rated = user_train[user] + user_valid[user]
        items = user_valid[user] + bert4rec_dataset.random_neg_sampling(rated_item = rated, num_item_sample = num_item_sample)

        with torch.no_grad():
            seq = torch.LongTensor([seq]).to(device)
            predictions = -model(seq)
            predictions = predictions[0][-1][items] # sampling
            rank = predictions.argsort().argsort()[0].item() # label

        if rank < 10: #Top10
            NDCG += 1 / np.log2(rank + 2)
            HIT += 1

    NDCG /= len(users)
    HIT /= len(users)

    return NDCG, HIT


In [19]:
make_sequence_dataset = MakeSequenceDataSet(config)

In [20]:
user_train, user_valid = make_sequence_dataset.get_train_valid_data()

In [21]:
bert4rec_dataset = BERTRecDataSet(
    user_train = user_train,
    max_len = config['max_len'],
    num_user = make_sequence_dataset.num_user,
    num_item = make_sequence_dataset.num_item,
    mask_prob = config['mask_prob'],
    )

In [22]:
data_loader = DataLoader(
    bert4rec_dataset,
    batch_size = config['batch_size'],
    shuffle = True,
    pin_memory = True,
    num_workers = config['num_workers'],
    )

In [23]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = BERT(
    num_items = 26744,
    bert_hidden_units = config['hidden_units'],
    bert_num_heads = config['num_heads'],
    bert_num_blocks = config['num_layers'],
    bert_max_len = config['max_len'],
    bert_dropout = config['dropout_rate'],
    ).to(device)

criterion = nn.CrossEntropyLoss(ignore_index=0) # label (padding)
optimizer = torch.optim.AdamW(model.parameters(), lr=config['lr'], weight_decay=config['weight_decay'])

In [24]:
def get_scheduler(optimizer, warmup_steps, total_steps):
    """
    Simple linear warmup + linear decay:
    - LR ramps up linearly from 0 to base_lr over `warmup_steps`
    - Then decays linearly back to 0 over the remaining steps.
    """
    def lr_lambda(current_step):
        if current_step < warmup_steps:
            return float(current_step) / float(max(1, warmup_steps))
        else:
            return max(
                0.0,
                1.0 - float(current_step - warmup_steps) / float(max(1, total_steps - warmup_steps))
            )

    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda)

total_steps = len(data_loader) * config['num_epochs']  # e.g. total updates
warmup_steps = 0.1 * total_steps
scheduler = get_scheduler(optimizer, warmup_steps, total_steps)

In [25]:
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [None]:
start_time = time.time()
loss_list = []
ndcg_list = []
hit_list = []
memory_list = []

# Remove early stopping variables
# best_ndcg = 0.0
# epochs_no_improve = 0
# patience = 5

num_epochs = config['num_epochs']

for epoch in tqdm(range(1, num_epochs + 1)):
    # Reset peak memory stats at the beginning of each epoch
    torch.cuda.reset_peak_memory_stats(device)

    # Log memory before training (optional)
    mem_before = torch.cuda.memory_allocated(device) / 1024**2
    print(f"Epoch {epoch} - GPU Memory Allocated before training: {mem_before:.2f} MB")

    # 1) Training step
    train_loss = train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        data_loader=data_loader,
        scheduler=scheduler,
        config=config,
        device=device
    )
    loss_list.append(train_loss)
    print(f'Epoch: {epoch:3d} | Train loss: {train_loss:.5f}')

    # Log peak memory usage during training
    peak_mem = torch.cuda.max_memory_allocated(device) / 1024**2  # in MB
    memory_list.append(peak_mem)
    print(f"Epoch {epoch} - Peak GPU Memory during training: {peak_mem:.2f} MB")

    # 2) Validation step (NDCG, HIT)
    ndcg, hit = evaluate(
        model=model,
        user_train=user_train,
        user_valid=user_valid,
        max_len=config['max_len'],
        data_loader=None,
        make_sequence_dataset=make_sequence_dataset,
        bert4rec_dataset=bert4rec_dataset
    )
    ndcg_list.append(ndcg)
    hit_list.append(hit)
    print(f'NDCG@10: {ndcg:.4f} | HIT@10: {hit:.4f}')

    # (Early stopping logic removed; run for all epochs)

end_time = time.time()
total_time = end_time - start_time
print(f"Total training time for {num_epochs} epochs: {total_time:.2f} seconds")

# After training, compute the best metrics
best_ndcg = max(ndcg_list)
best_hit = max(hit_list)
peak_memory = max(memory_list)
print(f"Best NDCG@10 achieved: {best_ndcg:.4f}")
print(f"Best HIT@10 achieved: {best_hit:.4f}")
print(f"Peak GPU Memory: {peak_memory:.4f}")

# Save the final model and optionally the best model (if desired)
torch.save(model.state_dict(), "final_checkpoint.pth")

  0%|                                                                                                                                                                 | 0/25 [00:00<?, ?it/s]

Epoch 1 - GPU Memory Allocated before training: 58.06 MB



[A%|                                                                                                                                                                 | 0/48 [00:00<?, ?it/s]
[A%|███▏                                                                                                                                                     | 1/48 [00:00<00:36,  1.30it/s]
[A%|██████▍                                                                                                                                                  | 2/48 [00:00<00:19,  2.41it/s]
[A%|█████████▌                                                                                                                                               | 3/48 [00:01<00:13,  3.34it/s]
[A%|████████████▊                                                                                                                                            | 4/48 [00:01<00:10,  4.07it/s]
[A%|███████████████▉                            

Epoch:   1 | Train loss: 9.84188
Epoch 1 - Peak GPU Memory during training: 9605.83 MB



[A%|                                                                                                                                                               | 0/6040 [00:00<?, ?it/s]
[A%|▋                                                                                                                                                    | 28/6040 [00:00<00:21, 278.12it/s]
[A%|█▋                                                                                                                                                   | 68/6040 [00:00<00:17, 345.51it/s]
[A%|██▌                                                                                                                                                 | 107/6040 [00:00<00:16, 365.14it/s]
[A%|███▌                                                                                                                                                | 146/6040 [00:00<00:15, 373.97it/s]
[A%|████▌                                       

NDCG@10: 0.1942 | HIT@10: 0.3702
Epoch 2 - GPU Memory Allocated before training: 248.49 MB



[A%|                                                                                                                                                                 | 0/48 [00:00<?, ?it/s]
[A%|███▏                                                                                                                                                     | 1/48 [00:00<00:13,  3.49it/s]
[A%|██████▍                                                                                                                                                  | 2/48 [00:00<00:09,  4.68it/s]
[A%|█████████▌                                                                                                                                               | 3/48 [00:00<00:08,  5.25it/s]
[A%|████████████▊                                                                                                                                            | 4/48 [00:00<00:07,  5.58it/s]
[A%|███████████████▉                            

Epoch:   2 | Train loss: 7.83747
Epoch 2 - Peak GPU Memory during training: 9605.83 MB



[A%|                                                                                                                                                               | 0/6040 [00:00<?, ?it/s]
[A%|█                                                                                                                                                    | 41/6040 [00:00<00:14, 404.02it/s]
[A%|██                                                                                                                                                   | 82/6040 [00:00<00:14, 403.45it/s]
[A%|███                                                                                                                                                 | 123/6040 [00:00<00:14, 402.83it/s]
[A%|████                                                                                                                                                | 164/6040 [00:00<00:14, 401.57it/s]
[A%|█████                                       

NDCG@10: 0.2221 | HIT@10: 0.4157
Epoch 3 - GPU Memory Allocated before training: 248.49 MB



[A%|                                                                                                                                                                 | 0/48 [00:00<?, ?it/s]
[A%|███▏                                                                                                                                                     | 1/48 [00:00<00:13,  3.55it/s]
[A%|██████▍                                                                                                                                                  | 2/48 [00:00<00:09,  4.72it/s]
[A%|█████████▌                                                                                                                                               | 3/48 [00:00<00:08,  5.29it/s]
[A%|████████████▊                                                                                                                                            | 4/48 [00:00<00:07,  5.60it/s]
[A%|███████████████▉                            

Epoch:   3 | Train loss: 7.58664
Epoch 3 - Peak GPU Memory during training: 9605.83 MB



[A%|                                                                                                                                                               | 0/6040 [00:00<?, ?it/s]
[A%|█                                                                                                                                                    | 41/6040 [00:00<00:14, 400.43it/s]
[A%|██                                                                                                                                                   | 82/6040 [00:00<00:15, 396.29it/s]
[A%|██▉                                                                                                                                                 | 122/6040 [00:00<00:15, 393.66it/s]
[A%|███▉                                                                                                                                                | 162/6040 [00:00<00:14, 393.57it/s]
[A%|████▉                                       

NDCG@10: 0.2174 | HIT@10: 0.4036
Epoch 4 - GPU Memory Allocated before training: 248.49 MB



[A%|                                                                                                                                                                 | 0/48 [00:00<?, ?it/s]
[A%|███▏                                                                                                                                                     | 1/48 [00:00<00:13,  3.51it/s]
[A%|██████▍                                                                                                                                                  | 2/48 [00:00<00:09,  4.69it/s]
[A%|█████████▌                                                                                                                                               | 3/48 [00:00<00:08,  5.26it/s]
[A%|████████████▊                                                                                                                                            | 4/48 [00:00<00:07,  5.58it/s]
[A%|███████████████▉                            

Epoch:   4 | Train loss: 7.42559
Epoch 4 - Peak GPU Memory during training: 9605.83 MB



[A%|                                                                                                                                                               | 0/6040 [00:00<?, ?it/s]
[A%|▉                                                                                                                                                    | 39/6040 [00:00<00:15, 383.16it/s]
[A%|█▉                                                                                                                                                   | 78/6040 [00:00<00:15, 386.97it/s]
[A%|██▊                                                                                                                                                 | 117/6040 [00:00<00:15, 384.50it/s]
[A%|███▊                                                                                                                                                | 156/6040 [00:00<00:15, 380.38it/s]
[A%|████▊                                       

NDCG@10: 0.2394 | HIT@10: 0.4425
Epoch 5 - GPU Memory Allocated before training: 248.49 MB



[A%|                                                                                                                                                                 | 0/48 [00:00<?, ?it/s]
[A%|███▏                                                                                                                                                     | 1/48 [00:00<00:13,  3.53it/s]
[A%|██████▍                                                                                                                                                  | 2/48 [00:00<00:09,  4.71it/s]
[A%|█████████▌                                                                                                                                               | 3/48 [00:00<00:08,  5.28it/s]
[A%|████████████▊                                                                                                                                            | 4/48 [00:00<00:07,  5.59it/s]
[A%|███████████████▉                            

Epoch:   5 | Train loss: 7.28492
Epoch 5 - Peak GPU Memory during training: 9605.83 MB



[A%|                                                                                                                                                               | 0/6040 [00:00<?, ?it/s]
[A%|▉                                                                                                                                                    | 40/6040 [00:00<00:15, 398.19it/s]
[A%|█▉                                                                                                                                                   | 80/6040 [00:00<00:15, 396.84it/s]
[A%|██▉                                                                                                                                                 | 120/6040 [00:00<00:14, 397.11it/s]
[A%|███▉                                                                                                                                                | 160/6040 [00:00<00:14, 396.53it/s]
[A%|████▉                                       

NDCG@10: 0.2427 | HIT@10: 0.4429
Epoch 6 - GPU Memory Allocated before training: 248.49 MB



[A%|                                                                                                                                                                 | 0/48 [00:00<?, ?it/s]
[A%|███▏                                                                                                                                                     | 1/48 [00:00<00:13,  3.36it/s]
[A%|██████▍                                                                                                                                                  | 2/48 [00:00<00:10,  4.58it/s]
[A%|█████████▌                                                                                                                                               | 3/48 [00:00<00:08,  5.19it/s]
[A%|████████████▊                                                                                                                                            | 4/48 [00:00<00:07,  5.54it/s]
[A%|███████████████▉                            

Epoch:   6 | Train loss: 7.18444
Epoch 6 - Peak GPU Memory during training: 9605.83 MB



[A%|                                                                                                                                                               | 0/6040 [00:00<?, ?it/s]
[A%|▉                                                                                                                                                    | 39/6040 [00:00<00:15, 389.68it/s]
[A%|█▉                                                                                                                                                   | 78/6040 [00:00<00:15, 388.74it/s]
[A%|██▉                                                                                                                                                 | 118/6040 [00:00<00:15, 389.62it/s]
[A%|███▊                                                                                                                                                | 157/6040 [00:00<00:15, 388.04it/s]
[A%|████▊                                       

NDCG@10: 0.2456 | HIT@10: 0.4465
Epoch 7 - GPU Memory Allocated before training: 248.49 MB



[A%|                                                                                                                                                                 | 0/48 [00:00<?, ?it/s]
[A%|███▏                                                                                                                                                     | 1/48 [00:00<00:13,  3.42it/s]
[A%|██████▍                                                                                                                                                  | 2/48 [00:00<00:09,  4.61it/s]
[A%|█████████▌                                                                                                                                               | 3/48 [00:00<00:08,  5.20it/s]
[A%|████████████▊                                                                                                                                            | 4/48 [00:00<00:07,  5.54it/s]
[A%|███████████████▉                            

Epoch:   7 | Train loss: 7.07594
Epoch 7 - Peak GPU Memory during training: 9605.83 MB



[A%|                                                                                                                                                               | 0/6040 [00:00<?, ?it/s]
[A%|▉                                                                                                                                                    | 39/6040 [00:00<00:15, 387.52it/s]
[A%|█▉                                                                                                                                                   | 78/6040 [00:00<00:15, 384.91it/s]
[A%|██▉                                                                                                                                                 | 118/6040 [00:00<00:15, 388.89it/s]
[A%|███▊                                                                                                                                                | 157/6040 [00:00<00:15, 385.84it/s]
[A%|████▊                                       

NDCG@10: 0.2749 | HIT@10: 0.4950
Epoch 8 - GPU Memory Allocated before training: 248.49 MB



[A%|                                                                                                                                                                 | 0/48 [00:00<?, ?it/s]
[A%|███▏                                                                                                                                                     | 1/48 [00:00<00:14,  3.28it/s]
[A%|██████▍                                                                                                                                                  | 2/48 [00:00<00:10,  4.51it/s]
[A%|█████████▌                                                                                                                                               | 3/48 [00:00<00:08,  5.14it/s]
[A%|████████████▊                                                                                                                                            | 4/48 [00:00<00:08,  5.50it/s]
[A%|███████████████▉                            

Epoch:   8 | Train loss: 6.98294
Epoch 8 - Peak GPU Memory during training: 9605.83 MB



[A%|                                                                                                                                                               | 0/6040 [00:00<?, ?it/s]
[A%|▉                                                                                                                                                    | 39/6040 [00:00<00:15, 387.38it/s]
[A%|█▉                                                                                                                                                   | 78/6040 [00:00<00:15, 387.36it/s]
[A%|██▉                                                                                                                                                 | 118/6040 [00:00<00:15, 389.81it/s]
[A%|███▊                                                                                                                                                | 158/6040 [00:00<00:15, 390.92it/s]
[A%|████▊                                       

NDCG@10: 0.2878 | HIT@10: 0.5124
Epoch 9 - GPU Memory Allocated before training: 248.49 MB



[A%|                                                                                                                                                                 | 0/48 [00:00<?, ?it/s]
[A%|███▏                                                                                                                                                     | 1/48 [00:00<00:13,  3.58it/s]
[A%|██████▍                                                                                                                                                  | 2/48 [00:00<00:09,  4.74it/s]
[A%|█████████▌                                                                                                                                               | 3/48 [00:00<00:08,  5.30it/s]
[A%|████████████▊                                                                                                                                            | 4/48 [00:00<00:07,  5.61it/s]
[A%|███████████████▉                            

Epoch:   9 | Train loss: 6.86418
Epoch 9 - Peak GPU Memory during training: 9605.83 MB



[A%|                                                                                                                                                               | 0/6040 [00:00<?, ?it/s]
[A%|▉                                                                                                                                                    | 39/6040 [00:00<00:15, 380.80it/s]
[A%|█▉                                                                                                                                                   | 78/6040 [00:00<00:15, 381.01it/s]
[A%|██▊                                                                                                                                                 | 117/6040 [00:00<00:15, 380.46it/s]
[A%|███▊                                                                                                                                                | 156/6040 [00:00<00:15, 380.72it/s]
[A%|████▊                                       

NDCG@10: 0.3073 | HIT@10: 0.5500
Epoch 10 - GPU Memory Allocated before training: 248.49 MB



[A%|                                                                                                                                                                 | 0/48 [00:00<?, ?it/s]
[A%|███▏                                                                                                                                                     | 1/48 [00:00<00:13,  3.59it/s]
[A%|██████▍                                                                                                                                                  | 2/48 [00:00<00:09,  4.74it/s]
[A%|█████████▌                                                                                                                                               | 3/48 [00:00<00:08,  5.30it/s]
[A%|████████████▊                                                                                                                                            | 4/48 [00:00<00:07,  5.60it/s]
[A%|███████████████▉                            

Epoch:  10 | Train loss: 6.76235
Epoch 10 - Peak GPU Memory during training: 9605.83 MB



[A%|                                                                                                                                                               | 0/6040 [00:00<?, ?it/s]
[A%|▉                                                                                                                                                    | 39/6040 [00:00<00:15, 386.86it/s]
[A%|█▉                                                                                                                                                   | 78/6040 [00:00<00:15, 388.55it/s]
[A%|██▊                                                                                                                                                 | 117/6040 [00:00<00:15, 388.30it/s]
[A%|███▊                                                                                                                                                | 156/6040 [00:00<00:15, 387.81it/s]
[A%|████▊                                       

NDCG@10: 0.3083 | HIT@10: 0.5469
Epoch 11 - GPU Memory Allocated before training: 248.49 MB



[A%|                                                                                                                                                                 | 0/48 [00:00<?, ?it/s]
[A%|███▏                                                                                                                                                     | 1/48 [00:00<00:13,  3.45it/s]
[A%|██████▍                                                                                                                                                  | 2/48 [00:00<00:09,  4.64it/s]
[A%|█████████▌                                                                                                                                               | 3/48 [00:00<00:08,  5.22it/s]
[A%|████████████▊                                                                                                                                            | 4/48 [00:00<00:07,  5.56it/s]
[A%|███████████████▉                            

Epoch:  11 | Train loss: 6.65613
Epoch 11 - Peak GPU Memory during training: 9605.83 MB



[A%|                                                                                                                                                               | 0/6040 [00:00<?, ?it/s]
[A%|▉                                                                                                                                                    | 38/6040 [00:00<00:15, 375.15it/s]
[A%|█▊                                                                                                                                                   | 76/6040 [00:00<00:15, 375.36it/s]
[A%|██▊                                                                                                                                                 | 114/6040 [00:00<00:15, 375.00it/s]
[A%|███▋                                                                                                                                                | 152/6040 [00:00<00:15, 374.35it/s]
[A%|████▋                                       

NDCG@10: 0.3362 | HIT@10: 0.5778
Epoch 12 - GPU Memory Allocated before training: 248.49 MB



[A%|                                                                                                                                                                 | 0/48 [00:00<?, ?it/s]
[A%|███▏                                                                                                                                                     | 1/48 [00:00<00:14,  3.34it/s]
[A%|██████▍                                                                                                                                                  | 2/48 [00:00<00:10,  4.55it/s]
[A%|█████████▌                                                                                                                                               | 3/48 [00:00<00:08,  5.17it/s]
[A%|████████████▊                                                                                                                                            | 4/48 [00:00<00:07,  5.52it/s]
[A%|███████████████▉                            

Epoch:  12 | Train loss: 6.58547
Epoch 12 - Peak GPU Memory during training: 9605.83 MB



[A%|                                                                                                                                                               | 0/6040 [00:00<?, ?it/s]
[A%|▉                                                                                                                                                    | 38/6040 [00:00<00:15, 379.18it/s]
[A%|█▉                                                                                                                                                   | 77/6040 [00:00<00:15, 384.10it/s]
[A%|██▊                                                                                                                                                 | 116/6040 [00:00<00:15, 384.80it/s]
[A%|███▊                                                                                                                                                | 155/6040 [00:00<00:15, 385.36it/s]
[A%|████▊                                       

NDCG@10: 0.3204 | HIT@10: 0.5563
Epoch 13 - GPU Memory Allocated before training: 248.49 MB



[A%|                                                                                                                                                                 | 0/48 [00:00<?, ?it/s]
[A%|███▏                                                                                                                                                     | 1/48 [00:00<00:13,  3.41it/s]
[A%|██████▍                                                                                                                                                  | 2/48 [00:00<00:09,  4.61it/s]
[A%|█████████▌                                                                                                                                               | 3/48 [00:00<00:08,  5.21it/s]
[A%|████████████▊                                                                                                                                            | 4/48 [00:00<00:07,  5.55it/s]
[A%|███████████████▉                            

Epoch:  13 | Train loss: 6.52609
Epoch 13 - Peak GPU Memory during training: 9605.83 MB



[A%|                                                                                                                                                               | 0/6040 [00:00<?, ?it/s]
[A%|▉                                                                                                                                                    | 40/6040 [00:00<00:15, 392.49it/s]
[A%|█▉                                                                                                                                                   | 80/6040 [00:00<00:15, 389.63it/s]
[A%|██▉                                                                                                                                                 | 119/6040 [00:00<00:15, 389.05it/s]
[A%|███▊                                                                                                                                                | 158/6040 [00:00<00:15, 388.56it/s]
[A%|████▊                                       

NDCG@10: 0.3432 | HIT@10: 0.5876
Epoch 14 - GPU Memory Allocated before training: 248.49 MB



[A%|                                                                                                                                                                 | 0/48 [00:00<?, ?it/s]
[A%|███▏                                                                                                                                                     | 1/48 [00:00<00:13,  3.39it/s]
[A%|██████▍                                                                                                                                                  | 2/48 [00:00<00:10,  4.58it/s]
[A%|█████████▌                                                                                                                                               | 3/48 [00:00<00:08,  5.20it/s]
[A%|████████████▊                                                                                                                                            | 4/48 [00:00<00:07,  5.55it/s]
[A%|███████████████▉                            

Epoch:  14 | Train loss: 6.49506
Epoch 14 - Peak GPU Memory during training: 9605.83 MB



[A%|                                                                                                                                                               | 0/6040 [00:00<?, ?it/s]
[A%|▉                                                                                                                                                    | 40/6040 [00:00<00:15, 391.07it/s]
[A%|█▉                                                                                                                                                   | 80/6040 [00:00<00:15, 388.80it/s]
[A%|██▉                                                                                                                                                 | 119/6040 [00:00<00:15, 386.73it/s]
[A%|███▊                                                                                                                                                | 158/6040 [00:00<00:15, 386.27it/s]
[A%|████▊                                       

NDCG@10: 0.3493 | HIT@10: 0.5939
Epoch 15 - GPU Memory Allocated before training: 248.49 MB



[A%|                                                                                                                                                                 | 0/48 [00:00<?, ?it/s]
[A%|███▏                                                                                                                                                     | 1/48 [00:00<00:13,  3.36it/s]
[A%|██████▍                                                                                                                                                  | 2/48 [00:00<00:10,  4.57it/s]
[A%|█████████▌                                                                                                                                               | 3/48 [00:00<00:08,  5.18it/s]
[A%|████████████▊                                                                                                                                            | 4/48 [00:00<00:07,  5.53it/s]
[A%|███████████████▉                            

Epoch:  15 | Train loss: 6.44797
Epoch 15 - Peak GPU Memory during training: 9605.83 MB



[A%|                                                                                                                                                               | 0/6040 [00:00<?, ?it/s]
[A%|▉                                                                                                                                                    | 40/6040 [00:00<00:15, 390.85it/s]
[A%|█▉                                                                                                                                                   | 80/6040 [00:00<00:15, 388.88it/s]
[A%|██▉                                                                                                                                                 | 119/6040 [00:00<00:15, 385.87it/s]
[A%|███▊                                                                                                                                                | 158/6040 [00:00<00:15, 385.23it/s]
[A%|████▊                                       

NDCG@10: 0.3527 | HIT@10: 0.5972
Epoch 16 - GPU Memory Allocated before training: 248.49 MB



[A%|                                                                                                                                                                 | 0/48 [00:00<?, ?it/s]
[A%|███▏                                                                                                                                                     | 1/48 [00:00<00:14,  3.34it/s]
[A%|██████▍                                                                                                                                                  | 2/48 [00:00<00:10,  4.56it/s]
[A%|█████████▌                                                                                                                                               | 3/48 [00:00<00:08,  5.16it/s]
[A%|████████████▊                                                                                                                                            | 4/48 [00:00<00:07,  5.51it/s]
[A%|███████████████▉                            

Epoch:  16 | Train loss: 6.41218
Epoch 16 - Peak GPU Memory during training: 9605.83 MB



[A%|                                                                                                                                                               | 0/6040 [00:00<?, ?it/s]
[A%|▉                                                                                                                                                    | 38/6040 [00:00<00:15, 378.86it/s]
[A%|█▉                                                                                                                                                   | 77/6040 [00:00<00:15, 379.71it/s]
[A%|██▊                                                                                                                                                 | 116/6040 [00:00<00:15, 382.03it/s]
[A%|███▊                                                                                                                                                | 156/6040 [00:00<00:15, 386.13it/s]
[A%|████▊                                       

NDCG@10: 0.3654 | HIT@10: 0.6081
Epoch 17 - GPU Memory Allocated before training: 248.49 MB



[A%|                                                                                                                                                                 | 0/48 [00:00<?, ?it/s]
[A%|███▏                                                                                                                                                     | 1/48 [00:00<00:13,  3.47it/s]
[A%|██████▍                                                                                                                                                  | 2/48 [00:00<00:09,  4.65it/s]
[A%|█████████▌                                                                                                                                               | 3/48 [00:00<00:08,  5.24it/s]
[A%|████████████▊                                                                                                                                            | 4/48 [00:00<00:07,  5.57it/s]
[A%|███████████████▉                            

Epoch:  17 | Train loss: 6.36699
Epoch 17 - Peak GPU Memory during training: 9605.83 MB



[A%|                                                                                                                                                               | 0/6040 [00:00<?, ?it/s]
[A%|▉                                                                                                                                                    | 40/6040 [00:00<00:15, 397.17it/s]
[A%|█▉                                                                                                                                                   | 80/6040 [00:00<00:15, 394.73it/s]
[A%|██▉                                                                                                                                                 | 120/6040 [00:00<00:15, 393.47it/s]
[A%|███▉                                                                                                                                                | 160/6040 [00:00<00:14, 392.67it/s]
[A%|████▉                                       

NDCG@10: 0.3658 | HIT@10: 0.6081
Epoch 18 - GPU Memory Allocated before training: 248.49 MB



[A%|                                                                                                                                                                 | 0/48 [00:00<?, ?it/s]
[A%|███▏                                                                                                                                                     | 1/48 [00:00<00:13,  3.57it/s]
[A%|██████▍                                                                                                                                                  | 2/48 [00:00<00:09,  4.73it/s]
[A%|█████████▌                                                                                                                                               | 3/48 [00:00<00:08,  5.30it/s]
[A%|████████████▊                                                                                                                                            | 4/48 [00:00<00:07,  5.61it/s]
[A%|███████████████▉                            

Epoch:  18 | Train loss: 6.34605
Epoch 18 - Peak GPU Memory during training: 9605.83 MB



[A%|                                                                                                                                                               | 0/6040 [00:00<?, ?it/s]
[A%|█                                                                                                                                                    | 41/6040 [00:00<00:14, 401.23it/s]
[A%|██                                                                                                                                                   | 82/6040 [00:00<00:14, 401.15it/s]
[A%|███                                                                                                                                                 | 123/6040 [00:00<00:14, 400.86it/s]
[A%|████                                                                                                                                                | 164/6040 [00:00<00:14, 400.78it/s]
[A%|█████                                       

NDCG@10: 0.3812 | HIT@10: 0.6280
Epoch 19 - GPU Memory Allocated before training: 248.49 MB



[A%|                                                                                                                                                                 | 0/48 [00:00<?, ?it/s]
[A%|███▏                                                                                                                                                     | 1/48 [00:00<00:13,  3.51it/s]
[A%|██████▍                                                                                                                                                  | 2/48 [00:00<00:09,  4.68it/s]
[A%|█████████▌                                                                                                                                               | 3/48 [00:00<00:08,  5.26it/s]
[A%|████████████▊                                                                                                                                            | 4/48 [00:00<00:07,  5.59it/s]
[A%|███████████████▉                            

Epoch:  19 | Train loss: 6.31531
Epoch 19 - Peak GPU Memory during training: 9605.83 MB



[A%|                                                                                                                                                               | 0/6040 [00:00<?, ?it/s]
[A%|█                                                                                                                                                    | 41/6040 [00:00<00:14, 401.35it/s]
[A%|██                                                                                                                                                   | 82/6040 [00:00<00:14, 401.87it/s]
[A%|███                                                                                                                                                 | 123/6040 [00:00<00:14, 401.71it/s]
[A%|████                                                                                                                                                | 164/6040 [00:00<00:14, 401.07it/s]
[A%|█████                                       

NDCG@10: 0.3792 | HIT@10: 0.6253
Epoch 20 - GPU Memory Allocated before training: 248.49 MB



[A%|                                                                                                                                                                 | 0/48 [00:00<?, ?it/s]
[A%|███▏                                                                                                                                                     | 1/48 [00:00<00:15,  3.09it/s]
[A%|██████▍                                                                                                                                                  | 2/48 [00:00<00:10,  4.35it/s]
[A%|█████████▌                                                                                                                                               | 3/48 [00:00<00:08,  5.03it/s]
[A%|████████████▊                                                                                                                                            | 4/48 [00:00<00:08,  5.42it/s]
[A%|███████████████▉                            

Epoch:  20 | Train loss: 6.28909
Epoch 20 - Peak GPU Memory during training: 9605.83 MB



[A%|                                                                                                                                                               | 0/6040 [00:00<?, ?it/s]
[A%|▉                                                                                                                                                    | 39/6040 [00:00<00:15, 384.91it/s]
[A%|█▉                                                                                                                                                   | 78/6040 [00:00<00:15, 378.92it/s]
[A%|██▉                                                                                                                                                 | 118/6040 [00:00<00:15, 384.87it/s]
[A%|███▊                                                                                                                                                | 157/6040 [00:00<00:15, 386.76it/s]
[A%|████▊                                       

NDCG@10: 0.3740 | HIT@10: 0.6217
Epoch 21 - GPU Memory Allocated before training: 248.49 MB



[A%|                                                                                                                                                                 | 0/48 [00:00<?, ?it/s]
[A%|███▏                                                                                                                                                     | 1/48 [00:00<00:13,  3.54it/s]
[A%|██████▍                                                                                                                                                  | 2/48 [00:00<00:09,  4.71it/s]
[A%|█████████▌                                                                                                                                               | 3/48 [00:00<00:08,  5.27it/s]
[A%|████████████▊                                                                                                                                            | 4/48 [00:00<00:07,  5.58it/s]
[A%|███████████████▉                            

Epoch:  21 | Train loss: 6.26089
Epoch 21 - Peak GPU Memory during training: 9605.83 MB



[A%|                                                                                                                                                               | 0/6040 [00:00<?, ?it/s]
[A%|▉                                                                                                                                                    | 39/6040 [00:00<00:15, 385.66it/s]
[A%|█▉                                                                                                                                                   | 78/6040 [00:00<00:15, 384.32it/s]
[A%|██▊                                                                                                                                                 | 117/6040 [00:00<00:15, 382.84it/s]
[A%|███▊                                                                                                                                                | 156/6040 [00:00<00:15, 384.69it/s]
[A%|████▊                                       

NDCG@10: 0.3852 | HIT@10: 0.6276
Epoch 22 - GPU Memory Allocated before training: 248.49 MB



[A%|                                                                                                                                                                 | 0/48 [00:00<?, ?it/s]
[A%|███▏                                                                                                                                                     | 1/48 [00:00<00:13,  3.58it/s]
[A%|██████▍                                                                                                                                                  | 2/48 [00:00<00:09,  4.74it/s]
[A%|█████████▌                                                                                                                                               | 3/48 [00:00<00:08,  5.29it/s]
[A%|████████████▊                                                                                                                                            | 4/48 [00:00<00:07,  5.60it/s]
[A%|███████████████▉                            

Epoch:  22 | Train loss: 6.26604
Epoch 22 - Peak GPU Memory during training: 9605.83 MB



[A%|                                                                                                                                                               | 0/6040 [00:00<?, ?it/s]
[A%|▉                                                                                                                                                    | 40/6040 [00:00<00:15, 390.95it/s]
[A%|█▉                                                                                                                                                   | 80/6040 [00:00<00:15, 388.33it/s]
[A%|██▉                                                                                                                                                 | 119/6040 [00:00<00:15, 384.61it/s]
[A%|███▊                                                                                                                                                | 158/6040 [00:00<00:15, 382.53it/s]
[A%|████▊                                       

In [None]:
'''
fig, ax = plt.subplots(1, 4, figsize=(20, 5))
epochs_range = list(range(1, len(loss_list) + 1))

ax[0].plot(epochs_range, loss_list, marker='o')
ax[0].set_title('Loss')
ax[0].set_xlabel('Epoch')
ax[0].set_ylabel('Loss')

ax[1].plot(epochs_range, ndcg_list, marker='o')
ax[1].set_title('NDCG')
ax[1].set_xlabel('Epoch')
ax[1].set_ylabel('NDCG@10')

ax[2].plot(epochs_range, hit_list, marker='o')
ax[2].set_title('HIT')
ax[2].set_xlabel('Epoch')
ax[2].set_ylabel('HIT@10')

ax[3].plot(epochs_range, memory_list, marker='o')
ax[3].set_title('Peak GPU Memory Usage')
ax[3].set_xlabel('Epoch')
ax[3].set_ylabel('Memory (MB)')

plt.tight_layout()
plt.show()
'''