# Demo
Here, I will show the example of how each module of the transformer works.

In [1]:
import torch

In [12]:
from modules.input_embedding import InputEmbeddings
from modules.positional_encoding import PositionalEncoding
from modules.feed_forward import FeedForwardBlock

## 1. InputEmbeddings

In [3]:
tokens = ["My", "name", "is", "Suraj", "."]
token_with_ids = torch.tensor([11, 34, 56, 345, 342]) #consider
seq_len = len(tokens)
d_model = 512
vocab_size = 1000 #assume

In [4]:
token_with_ids.unsqueeze(dim=0).shape

torch.Size([1, 5])

In [5]:
in_emb = InputEmbeddings(d_model,vocab_size)
out = in_emb(token_with_ids.unsqueeze(dim=0))

In [6]:
print(out)

tensor([[[ 35.8187, -27.8338,  27.5632,  ...,  48.8609,  27.4723, -26.5341],
         [  2.5645,  12.6105,  22.4665,  ...,   8.3939,   0.7163,   4.3543],
         [  0.1775,  13.2811,   7.6797,  ...,   0.2338,  39.8202,   1.0296],
         [  4.5432, -34.4606,   0.7824,  ...,   8.7938,  57.0339,  28.3806],
         [  0.8979,  45.1155,   8.7601,  ...,  16.7152,   1.7760,   2.5427]]],
       grad_fn=<MulBackward0>)


In [7]:
print(out.shape)

torch.Size([1, 5, 512])


## 2. Positional Embeddings

In [8]:
po_emb = PositionalEncoding(d_model, seq_len, 0.4)
out_p = po_emb(out)

In [9]:
print(out_p)

tensor([[[ 59.6979, -44.7229,  45.9387,  ...,   0.0000,   0.0000, -42.5568],
         [  5.6766,   0.0000,  38.8138,  ...,  15.6565,   1.1940,   0.0000],
         [  0.0000,   0.0000,  14.3601,  ...,   0.0000,  66.3673,   3.3827],
         [  7.8071, -59.0844,   1.7125,  ...,  16.3229,   0.0000,  48.9676],
         [  0.0000,  74.1031,  13.5049,  ...,  29.5253,   0.0000,   5.9045]]],
       grad_fn=<MulBackward0>)


In [10]:
print(out_p.shape)

torch.Size([1, 5, 512])


## 3. Feed Forward

In [11]:
d_diff = 1024

In [13]:
ff = FeedForwardBlock(d_model, d_diff, 0.4)
print(ff)

FeedForwardBlock(
  (linear_1): Linear(in_features=512, out_features=1024, bias=True)
  (dropout): Dropout(p=0.4, inplace=False)
  (linear_2): Linear(in_features=1024, out_features=512, bias=True)
)


In [14]:
ff_out = ff(out_p)

In [15]:
print(ff_out)

tensor([[[ -2.3896,  13.5402,   3.5408,  ...,   1.9613,   1.6081,  -7.8594],
         [ -9.2129,  -3.0078,  -4.7456,  ..., -10.6353,   2.4719, -10.5194],
         [  3.2546,   0.5504, -13.0712,  ..., -12.2775,  -8.0100,  16.2558],
         [  1.8906, -11.6131, -17.4832,  ...,   6.3097,   5.8341,  -9.1153],
         [  9.0015,   5.9390,  -5.2111,  ..., -10.1641,  16.9615,  -9.2007]]],
       grad_fn=<ViewBackward0>)


In [16]:
print(ff_out.shape)

torch.Size([1, 5, 512])
