In [None]:
import torch
if not torch.cuda.is_available():
  raise ValueError('change runtime to GPU')

In [None]:
# Use below line for demo in external colabs
# !pip install -q torchdata==0.3.0 torchtext==0.12 spacy==3.2 altair GPUtil
# !python -m spacy download de_core_news_sm
# !python -m spacy download en_core_web_sm
# !pip install -q git+https://github.com/nikitakapitan/transformers.git

In [1]:
from os.path import exists
import warnings
warnings.filterwarnings('ignore')

import torch
from transformers.data.token import load_tokenizers
from transformers.data.vocab import load_vocab

from transformers.training.train import train_model
from transformers.output import run_model_example

from google.colab import drive
drive.mount('/content/drive')

%load_ext autoreload
%autoreload 2

In [2]:
spacy_de, spacy_en = load_tokenizers()
vocab_src, vocab_tgt = load_vocab(spacy_de=spacy_de, spacy_en=spacy_en)

Finished.
Vocabulary sizes:
len: SRC=8315 TGT=6384


In [4]:
train_config = {
        'batch_size' : 32,
        'distributed' : False,
        'num_epochs' : 8,
        'accum_iter' : 10, # nb of gradient accumulation steps
        'base_lr' : 1.0,
        'max_padding' : 72, # add blanks to have total 72 tokens.
        'warmup' : 3000,
        'file_prefix' : 'multi30k_model_',
    }
architecture = {
        'src_vocab_len' : len(vocab_src),
        'tgt_vocab_len' : len(vocab_tgt),
        'N' : 6, # loop
        'd_model' : 512, # emb
        'd_ff' : 2048,
        'h' : 8,
        'p_dropout' : 0.1
    }

model_path = 'multi30k_model_final.pt'

if not exists(model_path):
    train_model(
        vocab_src=vocab_src,
        vocab_tgt=vocab_tgt,
        spacy_de=spacy_de,
        spacy_en=spacy_en,
        config=train_config,
        architecture=architecture,
        )

In [None]:
!cp multi30k_model_final.pt /content/drive/MyDrive
!cp vocab.pt /content/drive/MyDrive