In [5]:
import os
import torch
import wandb
import numpy as np
from tqdm import tqdm
from secret import WANDB_API_KEY
from src.model import Transformer
from src.utils import subsequent_mask
from src.training import (
    Batch, NoamOptimizer,
    LabelSmoothing, train_step
)
from sanity_tests import (
    test_subsequent_mask, test_positional_encoding, test_noam_lr_policy,
    test_label_smoothing_target_distribution, test_label_smoothing_regularization
)

ModuleNotFoundError: No module named 'wandb'

In [4]:
!pip install wandb

You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [2]:
def data_gen(V, batch, nbatches):
    for i in range(nbatches):
        data = torch.from_numpy(
            np.random.randint(
                1, V, size=(batch, 10)
            )
        )
        data[:, 0] = 1
        src = torch.autograd.Variable(data, requires_grad=False)
        tgt = torch.autograd.Variable(data, requires_grad=False)
        yield Batch(src, tgt, 0)

In [3]:
class SimpleLossCompute:
    
    def __init__(self, generator, criterion, opt=None):
        self.generator = generator
        self.criterion = criterion
        self.opt = opt
        
    def __call__(self, x, y, norm):
        x = self.generator(x)
        loss = self.criterion(
            x.contiguous().view(-1, x.size(-1)),
            y.contiguous().view(-1)
        ) / norm
        loss.backward()
        if self.opt is not None:
            self.opt.step()
            self.opt.optimizer.zero_grad()
        return loss.data.item() * norm

In [4]:
V = 11
model = Transformer(V, V, n=2)
criterion = LabelSmoothing(
    size=V, padding_index=0, smoothing=0.0
)
model_opt = NoamOptimizer(
    model.source_embedding[0].d_model, 1, 400,
    torch.optim.Adam(
        model.parameters(), lr=0,
        betas=(0.9, 0.98), eps=1e-9
    )
)

  torch.nn.init.xavier_uniform(p)


In [5]:
for epoch in range(10):
    model.train()
    train_step(
        data_gen(V, 30, 20), model, 
        SimpleLossCompute(
            model.generator,
            criterion, model_opt
        )
    )
    model.eval()
    print(
        train_step(
            data_gen(V, 30, 5), model, 
            SimpleLossCompute(
                model.generator,
                criterion, None
            )
        )
    )

	nonzero(Tensor input, *, Tensor out)
Consider using one of the following signatures instead:
	nonzero(Tensor input, *, bool as_tuple)


Epoch Step: 1 Loss: 3.222836 Tokens per Sec: 356.105133
Epoch Step: 1 Loss: 1.898097 Tokens per Sec: 514.855530
tensor(1.8864)
Epoch Step: 1 Loss: 1.961129 Tokens per Sec: 477.678619
Epoch Step: 1 Loss: 1.615044 Tokens per Sec: 545.999451
tensor(1.6368)
Epoch Step: 1 Loss: 1.881133 Tokens per Sec: 312.640778
Epoch Step: 1 Loss: 1.423026 Tokens per Sec: 550.668030
tensor(1.4335)
Epoch Step: 1 Loss: 1.788222 Tokens per Sec: 462.523651
Epoch Step: 1 Loss: 1.237994 Tokens per Sec: 498.938354
tensor(1.2300)
Epoch Step: 1 Loss: 1.650148 Tokens per Sec: 359.106323
Epoch Step: 1 Loss: 0.960283 Tokens per Sec: 471.688324
tensor(0.9734)
Epoch Step: 1 Loss: 1.151660 Tokens per Sec: 358.550964
Epoch Step: 1 Loss: 0.588864 Tokens per Sec: 339.864563
tensor(0.6103)
Epoch Step: 1 Loss: 0.920910 Tokens per Sec: 410.786011
Epoch Step: 1 Loss: 0.376575 Tokens per Sec: 511.768738
tensor(0.4024)
Epoch Step: 1 Loss: 0.554213 Tokens per Sec: 342.275574
Epoch Step: 1 Loss: 0.228428 Tokens per Sec: 427.981476

In [6]:
def greedy_decode(model, source, source_mask, max_length, start_symbol):
    memory = model.encode(source, source_mask)
    ys = torch.ones(1, 1).fill_(start_symbol).type_as(source.data)
    for i in range(max_length - 1):
        out = model.decode(
            memory, source_mask,
            torch.autograd.Variable(ys), 
            torch.autograd.Variable(
                subsequent_mask(ys.size(1)).type_as(source.data)
            )
        )
        prob = model.generator(out[:, -1])
        _, next_word = torch.max(prob, dim = 1)
        next_word = next_word.data[0]
        ys = torch.cat(
            [ys, torch.ones(1, 1).type_as(source.data).fill_(next_word)], dim=1
        )
    return ys

In [7]:
model.eval()
source = torch.autograd.Variable(
    torch.LongTensor(
        [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]
    )
)
source_mask = torch.autograd.Variable(
    torch.ones(1, 1, 10)
)
print(
    greedy_decode(
        model, source, source_mask,
        max_length=10, start_symbol=1
    )
)

tensor([[ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10]])
