Imports and Setup

In [10]:
%load_ext autoreload
%autoreload 2

import torch
import torch.nn as nn
from encoder import TransformerEncoder

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [11]:
# Set device to gpu if available
device = torch.device('cuda' if torch.cuda.is_available() 
                      else 'mps' if torch.backends.mps.is_available() 
                      else 'cpu')
print("You are using device: %s" % device)

You are using device: mps


Embedding Layer Tutorial Example

In [12]:
# The matrix stored by the embedding layer.
embedding_weights = torch.tensor([
    [ 0.12, -0.76,  1.34,  0.58, -1.21,  0.44, -0.09,  0.72, -0.33],
    [-0.45,  1.05,  0.39, -0.97,  0.18, -1.56,  0.87,  0.23, -0.12],
    [ 1.23, -0.64,  0.07,  0.92, -0.31,  0.51, -1.22,  0.84, -0.77],
    [-0.19,  0.34,  0.88, -1.03,  1.15, -0.42,  0.65, -0.91,  0.00],
    [ 0.48, -1.14,  0.26,  0.71, -0.53,  1.38, -0.66,  0.97, -0.28],
    [ 0.16,  0.62, -0.85,  1.04, -0.11, -0.76,  0.89, -1.34,  0.43],
    [-0.58,  1.11,  0.30, -0.69,  0.93, -0.37, -1.05,  0.57,  0.22],
    [ 0.35, -0.99,  0.79,  0.18, -0.82,  1.20, -0.48,  0.61, -0.13]
], dtype=torch.float)

# Initializing the embedding layer with the matrix above.
embedding_layer = nn.Embedding(num_embeddings = 8, embedding_dim = 9)
embedding_layer.weight.data = embedding_weights

# Example of sequence input to embedding layer.
input = torch.tensor([0,6,4])
output = embedding_layer(input)

torch.set_printoptions(precision=2)
print(output)

tensor([[ 0.12, -0.76,  1.34,  0.58, -1.21,  0.44, -0.09,  0.72, -0.33],
        [-0.58,  1.11,  0.30, -0.69,  0.93, -0.37, -1.05,  0.57,  0.22],
        [ 0.48, -1.14,  0.26,  0.71, -0.53,  1.38, -0.66,  0.97, -0.28]],
       grad_fn=<EmbeddingBackward0>)


Example Encoder Usage

In [13]:
# In this cell, we use a simulated batch of sentences, just to demonstrate usage. 

N = 32 #batch size 
T = 43 #sentence length
vocab_size = 1000
sentences = torch.randint(0, vocab_size, (N, T)).to(device)
print("Shape of input:", sentences.shape)

# Model parameters. In this example we use the default values, which are set to those used in the base model of the paper.
context_size = 43

# Sample usage. Note the shapes of the input and output.
model = TransformerEncoder(vocab_size, context_size).to(device)

output = model(sentences)
print("Shape of output:", output.shape)

Shape of input: torch.Size([32, 43])
Shape of output: torch.Size([32, 43, 512])


In [14]:
model

TransformerEncoder(
  (embeddings): Embedding(1000, 512)
  (positional_encodings): Embedding(43, 512)
  (encoder_stack): ModuleList(
    (0-5): 6 x EncoderLayer(
      (multihead_attention_sublayer): AttentionSubLayer(
        (multihead_attention): MultiHeadAttention()
        (dropout): Dropout(p=0.1, inplace=False)
        (layernorm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      )
      (feedforward_sublayer): FeedForwardSubLayer(
        (feedforward): Sequential(
          (0): Linear(in_features=512, out_features=2048, bias=True)
          (1): ReLU()
          (2): Linear(in_features=2048, out_features=512, bias=True)
        )
        (dropout): Dropout(p=0.1, inplace=False)
        (layernorm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      )
    )
  )
)