# Run Distribution Learning Benchmark

In [1]:
%cd ..

/home/adam/Projects/hybrid-transformer


In [2]:
import torch
import wandb

from hybrid_transformer.configs.task import TaskConfig
from hybrid_transformer.configs.model import ModelConfig
from hybrid_transformer.configs.trainer import TrainerConfig
from hybrid_transformer.configs.logger import LoggerConfig

from hybrid_transformer.utils.datasets.auto import AutoDataset
from hybrid_transformer.utils.tokenizers.auto import AutoTokenizer
from hybrid_transformer.models.auto import AutoModel
from hybrid_transformer.utils.loggers.wandb import WandbLogger

from hybrid_transformer.trainers.trainer import Trainer

from scripts.train import DEFAULT_CONFIG_FILES

%load_ext autoreload
%autoreload 2

  from .autonotebook import tqdm as notebook_tqdm
2023-12-26 18:00:50.670684: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-12-26 18:00:50.672554: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-26 18:00:50.697962: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-26 18:00:50.697985: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-26 18:00:50.698750: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515

In [3]:
task_config = TaskConfig.from_pretrained(DEFAULT_CONFIG_FILES['task'])
model_config = ModelConfig.from_pretrained(DEFAULT_CONFIG_FILES['model'])
trainer_config = TrainerConfig.from_pretrained(DEFAULT_CONFIG_FILES['trainer'])
logger_config = LoggerConfig.from_pretrained(DEFAULT_CONFIG_FILES['logger'])
task_config.validate = False

You are using a model of type GPT to instantiate a model of type . This is not supported for all configurations of models and can yield errors.


In [8]:
dataset = AutoDataset.from_config(task_config)
tokenizer = AutoTokenizer.from_config(task_config)
model = AutoModel.from_config(model_config)
logger = WandbLogger(logger_config, [task_config, model_config, trainer_config])
trainer = Trainer(config=trainer_config, model=model, train_dataset=dataset, eval_dataset=dataset, tokenizer=tokenizer, logger=logger)
trainer.load_checkpoint()
trainer._train_init()

number of parameters: 38.06M
tokens per iteration will be: 61,440
Using cuda device
Successfully resumed from ./results/lm/pre-trained/...
num decayed parameter tensors: 63, with 38,115,840 parameters
num non-decayed parameter tensors: 25, with 12,800 parameters
using fused AdamW: True
compiling the model... (takes a ~minute)


In [9]:
lm_inputs = trainer.get_batch('train', 'lm')
mlm_inputs = trainer.get_batch('train', 'mlm')

lm_outputs = trainer.model(
    input_ids=lm_inputs['input_ids'], attention_mask=lm_inputs['attention_mask'], 
    labels=lm_inputs['labels'], target=lm_inputs['target'], eos_mask=lm_inputs['eos_mask'])

mlm_outputs = trainer.model(
    input_ids=mlm_inputs['input_ids'], attention_mask=mlm_inputs['attention_mask'], 
    labels=mlm_inputs['labels'], target=mlm_inputs['target'], eos_mask=mlm_inputs['eos_mask'])

OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacty of 1.83 GiB of which 2.94 MiB is free. Including non-PyTorch memory, this process has 1.82 GiB memory in use. Of the allocated memory 1.69 GiB is allocated by PyTorch, and 56.93 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [22]:
trainer.tra

tensor(4.4458, device='cuda:0')

In [19]:
shift_labels

tensor([[13, 13, 13,  ...,  0,  0,  0],
        [13, 13, 17,  ...,  0,  0,  0],
        [16, 19, 13,  ...,  0,  0,  0],
        ...,
        [13, 16, 12,  ...,  0,  0,  0],
        [13, 13, 20,  ...,  0,  0,  0],
        [13, 12, 17,  ...,  0,  0,  0]], device='cuda:0')

In [20]:
shift_logits

tensor([[[ 1.8679e-01,  3.6928e+00,  1.1933e-01,  ...,  2.7387e-01,
          -2.2925e-03,  2.4751e-01],
         [-3.0744e-01,  2.4300e-01,  2.8138e-02,  ..., -1.0149e+00,
          -3.9690e-01, -1.3168e+00],
         [-2.9528e-01,  4.6905e-01, -1.7181e-01,  ..., -1.0297e+00,
          -2.7817e-01, -1.1676e+00],
         ...,
         [ 4.8069e+00,  3.3868e-01,  1.2406e-01,  ..., -1.4735e-01,
           4.3360e-01, -8.3324e-01],
         [ 4.2229e+00,  2.2291e-01, -2.8840e-02,  ..., -6.9874e-02,
           9.2272e-01, -4.1192e-01],
         [ 4.2008e+00,  2.5254e-01,  4.7244e-01,  ..., -4.4597e-01,
           1.9795e-02, -3.2272e-02]],

        [[ 2.1462e-01,  4.4047e+00,  3.4294e-01,  ..., -5.2393e-02,
          -1.6237e-01,  2.1099e-01],
         [-2.6895e-01,  3.7325e-01, -2.9334e-01,  ..., -1.0901e+00,
          -4.1790e-01, -1.2638e+00],
         [-2.8949e-01,  5.7877e-01, -1.7921e-01,  ..., -1.0775e+00,
          -3.5852e-01, -1.1731e+00],
         ...,
         [ 4.7025e+00,  3

In [18]:



# Flatten the tokens
loss_fct = CrossEntropyLoss()
loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [17]:
inputs

{'input_ids': tensor([[ 1, 13, 13,  ...,  0,  0,  0],
         [ 1, 13, 13,  ...,  0,  0,  0],
         [ 1, 16, 19,  ...,  0,  0,  0],
         ...,
         [ 1, 13, 16,  ...,  0,  0,  0],
         [ 1, 13, 13,  ...,  0,  0,  0],
         [ 1, 13, 12,  ...,  0,  0,  0]], device='cuda:0'),
 'attention_mask': None,
 'labels': tensor([[ 1, 13, 13,  ...,  0,  0,  0],
         [ 1, 13, 13,  ...,  0,  0,  0],
         [ 1, 16, 19,  ...,  0,  0,  0],
         ...,
         [ 1, 13, 16,  ...,  0,  0,  0],
         [ 1, 13, 13,  ...,  0,  0,  0],
         [ 1, 13, 12,  ...,  0,  0,  0]], device='cuda:0'),
 'target': None,
 'eos_mask': tensor([[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False

In [17]:
shift_labels.cpu()

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [45]:
idx = torch.ones(size=(8, 1), device=trainer.device) * tokenizer.generate_token_id
idx = idx.long()

In [70]:

# forward the model to get the logits for the index in the sequence
with torch.no_grad():
    outputs = model(input_ids=idx.long())

In [71]:
logits = outputs['lm_logits']
logits = logits[:, -1, :] / 1.0
probs = torch.nn.functional.softmax(logits, dim=-1)
idx_next = torch.multinomial(probs, num_samples=1)
idx = torch.cat((idx, idx_next), dim=1)            

In [72]:
idx

tensor([[  1., 258., 229., 229.,  15.],
        [  1., 307., 225., 225., 384.],
        [  1., 146., 146.,  94.,  94.],
        [  1., 564., 459., 226., 143.],
        [  1., 429.,  83., 418., 419.],
        [  1., 490., 120.,  41.,  41.],
        [  1.,   1.,   1., 319., 316.],
        [  1., 309., 573., 488., 509.]], device='cuda:0')

In [61]:
tokenizer.decode(idx_next)

['[Zr+]', '=', '[K+]', '[Te]', '[Rb]', 'b', 'b', '[Dy]']

In [1]:
inputs


NameError: name 'inputs' is not defined

In [13]:
from torch.nn import CrossEntropyLoss