In [6]:
import sys
sys.path.append('../')
import torch
from transformer.layers import MultiHeadAttention, FeedForward, LayerNorm
from transformer.encoder import EncoderLayer, Encoder

# Define the dimensions,, dropout rate, and number of layers
d_model = 8 # feature dimension
h = 8 #  number of heads
dropout = 0.1 # dropout ratio
d_ff = 2048 # the dimension of the feed forward network
batch = 1 # batch size
seq_len = 4 # sequence length

num_layers = 6 # number of encoder layer

# Create an instance of the MultiHeadAttention and FeedForward classes
self_attention_engine = MultiHeadAttention(d_model, h, dropout)
feed_forward = FeedForward(d_model, d_ff, dropout)  

# Create an instance of the EncoderLayer class
encoder_layer = EncoderLayer(d_model, self_attention_engine, feed_forward, dropout)

# Create an instance of the Encoder class
encoder = Encoder(d_model, encoder_layer, num_layers)

# Create a random tensor to represent a batch of sequences
torch.manual_seed(68) # for reproducible result of random process
x = torch.rand(batch, seq_len, d_model)  

# Pass the tensor through the encoder
output = encoder(x)

print("Initial input tensor: \n", x)
print("Output's shape: \n", output.shape)  
print("Output: \n", output)

Initial input tensor: 
 tensor([[[0.3991, 0.5521, 0.1004, 0.2844, 0.9998, 0.7077, 0.8031, 0.2066],
         [0.3589, 0.8509, 0.8753, 0.4669, 0.6566, 0.6026, 0.2785, 0.1350],
         [0.2257, 0.9548, 0.8214, 0.1386, 0.6055, 0.2300, 0.7895, 0.4098],
         [0.0428, 0.4400, 0.2381, 0.4967, 0.8516, 0.4378, 0.4367, 0.0916]]])
Output's shape: 
 torch.Size([1, 4, 8])
Output: 
 tensor([[[-0.1298, -1.0799, -1.3006, -0.1036,  0.6890,  0.2786,  1.8737,
          -0.2274],
         [-0.0067, -1.4082, -0.9107, -0.2387,  0.8635,  0.2880,  1.7797,
          -0.3669],
         [-0.3878, -1.0586, -0.8377, -0.2626,  1.0271, -0.0191,  1.9354,
          -0.3967],
         [-0.2650, -0.8458, -1.2890, -0.1296,  0.7813,  0.4001,  1.8692,
          -0.5212]]], grad_fn=<AddBackward0>)
