In [1]:
import torch.nn as nn
from attention import *


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.3.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "c:\Users\theju\anaconda3\envs\mlp\Lib\site-packages\ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "c:\Users\theju\anaconda3\envs\mlp\Lib\site-packages\traitlets\config\application.py", line 992, in launch_instance
    app.start()
  File "c:\Users\theju\anaconda3\envs\mlp\Lib\site-packages\ipykernel\kernelapp.py", line 736, in start
    self.io_loop.start()
  File "c:\Users\the

## Encoder

In [2]:
query_shape = (3, 64, 512)  # Example query tensor
key_shape = (3, 64, 512)    # Example key tensor 
value_shape = (3, 64, 512)  # Example value tensor
head_count = 8
model_size = 512
ffn_hidden_dimension = 4096

In [3]:
class Encoder(nn.Module):
    def __init__(self,query_shape,key_shape,value_shape,head_count,model_size,ffn_hidden_dimension):
        super().__init__()
        self.query_shape = query_shape
        self.key_shape = key_shape
        self.value_shape = value_shape
        self.head_count = head_count
        self.model_size = model_size
        self.ffn_hidden_dimension = ffn_hidden_dimension
        self.multi_head = MultiHeadAttention(self.query_shape,
                                             self.key_shape,
                                             self.value_shape,
                                             self.head_count,
                                             self.model_size)
        self.normal_layer_1 = nn.LayerNorm(self.model_size)
        self.normal_layer_2 = nn.LayerNorm(self.model_size)
        self.ffn = FeedForwardNetwork(self.model_size,self.model_size,
                                         self.ffn_hidden_dimension)
    
    def forward(self,x):
        input = x.detach().clone()
        layer_norm_1_output = self.normal_layer_1(input + self.multi_head(input,input,input))
        encoder_output = self.normal_layer_2(layer_norm_1_output + self.ffn(layer_norm_1_output))
        return encoder_output


In [4]:
encoder = Encoder(
                  query_shape,
                  key_shape,
                  value_shape,
                  head_count,
                  model_size,
                  ffn_hidden_dimension
                  )

In [5]:
x = torch.randn(3, 64, 512) 
encoder_output = encoder(x)

In [6]:
encoder_output.shape

torch.Size([3, 64, 512])

In [7]:
query_shape = (3, 64, 512)  # Example query tensor
key_shape = (3, 64, 512)    # Example key tensor 
value_shape = (3, 64, 512)  # Example value tensor
head_count = 8
model_size = 512
ffn_hidden_dimension = 4096

## Decoder

In [None]:
class Decoder(nn.Module):
    def __init__(self,query_shape,key_shape,value_shape,head_count,model_size,ffn_hidden_dimension):
        super().__init__()
        self.query_shape = query_shape
        self.key_shape = key_shape
        self.value_shape = value_shape
        self.head_count = head_count
        self.model_size = model_size
        self.ffn_hidden_dimension = ffn_hidden_dimension
        self.multi_head = MultiHeadAttention(self.query_shape,
                                             self.key_shape,
                                             self.value_shape,
                                             self.head_count,
                                             self.model_size)
        self.masked_multi_head = MultiHeadAttention(self.query_shape,
                                             self.key_shape,
                                             self.value_shape,
                                             self.head_count,
                                             self.model_size)
        self.normal_layer_1 = nn.LayerNorm(self.model_size)
        self.normal_layer_2 = nn.LayerNorm(self.model_size)
        self.normal_layer_3 = nn.LayerNorm(self.model_size)
        self.ffn = FeedForwardNetwork(self.model_size,self.model_size,
                                         self.ffn_hidden_dimension)
    
    def forward(self,x, encoder_output):

        """
        1. Masked Multi-Head Attention: 
        This is mainly used to prevent overlooking for futuristic values or predictions of the output. 
        """
        input = x.detach().clone()
        decoder_ip_sentence_length = input.shape[-2]
        mask = torch.triu(torch.ones((decoder_ip_sentence_length,
                                      decoder_ip_sentence_length)),diagonal=1)

        masked_attention = self.masked_multi_head(input,input,input,attention_mask = mask)
        layer_norm_1_output = self.normal_layer_1(input + masked_attention)

        """
        2. Multi-Head Attention: 
        This is mainly used to prevent overlooking for futuristic values or predictions of the output. 
        Query-> Decoder based 
        Key-> From the output of encoder
        Value-> From the output of encoder
        """
        multi_head_attention = self.multi_head(layer_norm_1_output,encoder_output,encoder_output)
        layer_norm_2_output = self.normal_layer_2(layer_norm_1_output + multi_head_attention)

        # 3. Feed-forward Network
        decoder_output = self.normal_layer_3(layer_norm_2_output + self.ffn(layer_norm_2_output))
        return decoder_output


In [18]:
decoder = Decoder(
                  query_shape,
                  key_shape,
                  value_shape,
                  head_count,
                  model_size,
                  ffn_hidden_dimension
                  )

In [19]:
encoder_output.shape

torch.Size([3, 64, 512])

In [20]:
y = torch.randn(3, 78, 512) 
decoder_output = decoder(y, encoder_output)

In [21]:
decoder_output.shape

torch.Size([3, 78, 512])