In [6]:
import sys
sys.path.append('../')
import torch
from transformer.layer import MultiHeadAttention, FeedForward, LayerNorm
from transformer.decoder import DecoderLayer, Decoder

# Define configuration
d_model = 6 # feature dimension
h = 6 #  number of heads
dropout = 0.1 # dropout ratio
d_ff = 2048 # the dimension of the feed forward network
batch_size = 1 # batch size
seq_len = 4 # sequence length
num_layers = 3 # number of encoder layer

# Create an instance of the MultiHeadAttention and FeedForward classes
self_attention = MultiHeadAttention(d_model, h, dropout)
encoder_decoder_attention = MultiHeadAttention(d_model, h, dropout)
feed_forward = FeedForward(d_model, d_ff, dropout) 

# Create an instance of the DecoderLayer class
decoder_layer = DecoderLayer(d_model, 
                             self_attention, 
                             encoder_decoder_attention, 
                             feed_forward, 
                             dropout)

# Create an instance of the Decoder class
decoder = Decoder(d_model, decoder_layer, num_layers)

# Create a random tensor to represent a batch of sequences
torch.manual_seed(68) # for reproducible result of random process
x = torch.rand(batch_size, seq_len, d_model) 
torch.manual_seed(101) # for reproducible result of random process
encoder_output = torch.rand(batch_size, seq_len, d_model) 

# Pass the tensor through the decoder
output = decoder(x, encoder_output)

print("Initial input tensor: \n", x)
print("Encoder output tensor: \n", encoder_output)
print("Decoder Output: \n", output) 
print("Decoder Output's shape: \n", output.shape)  

Initial input tensor: 
 tensor([[[0.3991, 0.5521, 0.1004, 0.2844, 0.9998, 0.7077],
         [0.8031, 0.2066, 0.3589, 0.8509, 0.8753, 0.4669],
         [0.6566, 0.6026, 0.2785, 0.1350, 0.2257, 0.9548],
         [0.8214, 0.1386, 0.6055, 0.2300, 0.7895, 0.4098]]])
Encoder output tensor: 
 tensor([[[0.1980, 0.4503, 0.0909, 0.8872, 0.2894, 0.0186],
         [0.9095, 0.3406, 0.4309, 0.7324, 0.4776, 0.0716],
         [0.5834, 0.7521, 0.7649, 0.1443, 0.7152, 0.3953],
         [0.6244, 0.3684, 0.8823, 0.3746, 0.1458, 0.3671]]])
Decoder Output: 
 tensor([[[-0.9032, -1.1390,  0.0430,  0.6153,  1.5720, -0.1880],
         [-0.7782, -0.7700, -0.0235,  1.9315, -0.2320, -0.1277],
         [-0.7886, -0.5498, -0.8934, -0.0418,  0.5489,  1.7247],
         [-0.9157, -1.5902,  0.8169,  0.6326,  0.5875,  0.4689]]],
       grad_fn=<AddBackward0>)
Decoder Output's shape: 
 torch.Size([1, 4, 6])
