In [None]:
import torch
if not torch.cuda.is_available():
  raise ValueError('change runtime to GPU')

In [None]:
# Use below line for demo in external colabs
# !pip install -q torchdata==0.3.0 torchtext==0.12 spacy==3.2 altair GPUtil
# !python -m spacy download de_core_news_sm
# !python -m spacy download en_core_web_sm
# !pip install -q git+https://github.com/nikitakapitan/transformers.git

In [6]:
from os.path import exists
import warnings
warnings.filterwarnings('ignore')

import torch
from transformers.data.token import load_tokenizers
from transformers.data.vocab import load_vocab

from transformers.training.train import train_model
from transformers.output import run_model_example

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
spacy_de, spacy_en = load_tokenizers()
vocab_src, vocab_tgt = load_vocab(spacy_de=spacy_de, spacy_en=spacy_en)

Finished.
Vocabulary sizes:
len: SRC=8315 TGT=6384


In [20]:
train_config = {
        'batch_size' : 32,
        'distributed' : False,
        'num_epochs' : 8,
        'accum_iter' : 10,
        'base_lr' : 1.0,
        'max_padding' : 72,
        'warmup' : 3000,
        'file_prefix' : 'multi30k_model_',
    }
architecture = {
        'src_vocab_len' : len(vocab_src),
        'tgt_vocab_len' : len(vocab_tgt),
        'N' : 6, # loop
        'd_model' : 512, # emb
        'd_ff' : 2048,
        'h' : 8,
        'p_dropout' : 0.1
    }

model_path = 'multi30k_model_final.pt'

if not exists(model_path):
    train_model(
        vocab_src=vocab_src,
        vocab_tgt=vocab_tgt,
        spacy_de=spacy_de,
        spacy_en=spacy_en,
        config=train_config,
        architecture=architecture,
        )

Train worker process using GPU n.0


RuntimeError: CUDA out of memory. Tried to allocate 2.00 MiB (GPU 0; 1.96 GiB total capacity; 1.05 GiB already allocated; 1.44 MiB free; 1.08 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
if False:
    run_model_example(vocab_src, vocab_tgt, spacy_de, spacy_en)

# Break-Down : run_model_example
## Step 1/3 : create validation dataloader

In [None]:
from transformers.data.load import create_dataloaders

_, valid_dataloader = create_dataloaders(
        device=torch.device("cpu"),
        vocab_src=vocab_src,
        vocab_tgt=vocab_tgt,
        spacy_de=spacy_de,
        spacy_en=spacy_en,
        batch_size=1,
        is_distributed=False,
    )

## Step 2/3 : create model and load its model state

In [None]:
# outputs.run_model_example step 2/3 : create and load model state

from transformers.main import make_model


model = make_model(len(vocab_src), len(vocab_tgt), N=6) # d_model=512, d_ff=2048, h=8
model.load_state_dict(
        torch.load("multi30k_model_final.pt", map_location=torch.device("cpu"))
    )

## Step 3/3 Break-Down : check_outputs

In [None]:
# outputs.run_model_example step 3/3 : check_outputs

from transformers.data.Batch import Batch

n_examples=5
pad_idx = 2
eos_string = "</s>"

results = [()] * n_examples

idx = 0 # example 0 in range(len(n_examples))
b = next(iter(valid_dataloader))
rb = Batch(src=b[0], tgt=b[1], pad=2)

src_tokens = [vocab_src.get_itos()[x] for x in rb.src[0] if x!=pad_idx]
tgt_tokens = [vocab_tgt.get_itos()[x] for x in rb.tgt[0] if x!=pad_idx]

print(f"Source text (Input) {src_tokens}")
print(f"Target Text (Ground Truth) {tgt_tokens}")

## 3-Substep 1/3 Greedy Decode

In [None]:
from transformers.helper import following_mask

b
src = rb.src
src_mask = rb.src_mask
max_len = 72
start_symbol = 0

memory = model.encode(src, src_mask)
print('memory shape=', memory.shape)

tgt=torch.zeros(1, 1).fill_(start_symbol).type_as(src.data)

i = 0 # in range(72 - 1)
out = model.decode(memory, src_mask, tgt, following_mask(tgt.size(1)).type_as(src.data))
print('out shape=', out.shape)