In [1]:
import math
import torch
from torch import nn, Tensor

from positional_encoder import models, utils

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'device is {device}')

device is cpu


In [2]:
train_batch_size = 20
eval_batch_size = 10
train_data, val_data, test_data, vocab = utils.get_wikitext2_data(train_batch_size, eval_batch_size, device)

print(f'Training Data: {train_data.size()}')
print(f'Validation Data: {val_data.size()}')
print(f'Test Data: {test_data.size()}')

Training Data: torch.Size([102499, 20])
Validation Data: torch.Size([21441, 10])
Test Data: torch.Size([24185, 10])


In [3]:
idxs, batch = list(range(35,50)), 0
tokens = train_data[idxs, batch]
words = [vocab.lookup_token(token) for token in tokens]

print(f'tokens:\n{tokens}')
print(f'words:\n{words}')

tokens:
tensor([    8,  5790,   299,    12,   575,   232,    67,   452,    19, 13722,
            5,   757,     3,  2500,    17])
words:
['a', 'tactical', 'role', '@-@', 'playing', 'video', 'game', 'developed', 'by', 'sega', 'and', 'media', '.', 'vision', 'for']


In [4]:
class SinCosTextEncoder(nn.Module):
    def __init__(self, n_tokens: int, d_model: int, init_range):
        super().__init__()
        self.encoder = nn.Embedding(n_tokens, d_model)
        self.encoder.weight.data.uniform_(-init_range, init_range)
        self.d_model = d_model

    def forward(self, src: Tensor):
        """
        Arguments:
            src: Tensor, shape ``[seq_len, batch_size]``
        """
        return self.encoder(src) * math.sqrt(self.d_model)


class SinCosPosEncoder(nn.Module):

    def __init__(self, d_model: int, dropout: float = 0.1, max_seq_len: int = 5000):
        super().__init__()

        ############# YOUR CODE HERE #############
        position = torch.arange(max_seq_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        positional_encoding = torch.zeros(max_seq_len, 1, d_model)
        positional_encoding[:, 0, 0::2] = torch.sin(position * div_term)
        positional_encoding[:, 0, 1::2] = torch.cos(position * div_term)
        ##########################################

        self.register_buffer('positional_encoding', positional_encoding)
        self.dropout = nn.Dropout(p=dropout)

    def forward(self, x: Tensor) -> Tensor:
        """
        Arguments:
            x: Tensor, shape ``[seq_len, batch_size, embedding_dim]``
        """
        x = x + self.positional_encoding[:x.size(0)]
        return self.dropout(x)

## Training Model

In [None]:
text_encoder = SinCosTextEncoder
pos_encoder = SinCosPosEncoder
n_tokens = len(vocab)

model = models.TransformerModel(text_encoder, pos_encoder, n_tokens=n_tokens).to(device)

criterion = nn.CrossEntropyLoss()
n_epochs = 3

utils.train(model, train_data, val_data, test_data, n_tokens, n_epochs, criterion, device)

| epoch   1 |   200/ 2928 batches | lr 0.50 | ms/batch 88.09 | loss  7.55 | ppl  1893.47
