In [7]:
import os
import sys
import importnb
from torch import nn
import torch
import numpy as np

In [8]:
notebook_path = os.getcwd()
parent_dir = os.path.dirname(notebook_path)
sys.path.append(parent_dir)
with __import__('importnb').Notebook(): 
    from utils.tools import MultiHeadAttention
    from utils.tools import AddPositionalEncoding
    from utils.tools import TransformerFFN

In [11]:
class TransformerEncoderLayer(nn.Module):
    def __init__(
        self,
        d_model:int,
        d_ff:int,
        num_head:int,
        dropout_rate:float,
        layer_norm_eps:float,
    ) -> None:
        super().__init__()
        ###layerの宣言
        self.mha = MultiHeadAttention(num_head,d_model)
        self.layernorm_mha = nn.LayerNorm(d_model,eps=layer_norm_eps)
        self.dropout_mha = nn.Dropout(dropout_rate)

        self.ffn = TransformerFFN(d_model,d_ff)
        self.dropout_ffn = nn.Dropout(dropout_rate)
        self.layernorm_ffn = nn.LayerNorm(d_model,eps=layer_norm_eps)

    def forward(
        self,
        x:torch.Tensor,
        mask:torch.Tensor=None
    ) -> torch.Tensor:
        
        ###attention層を通す
        output = self.__get_mha_output(x,mask)
        ###add+layernorm
        output = self.layernorm_mha(output+x)
        
        ###FFN層を通す
        output = self.__get_ffn_output(output)
        ###add+layernorm
        output = self.layernorm_ffn(output+x)

        return output
        
    def __get_mha_output(
        self,
        x:torch.Tensor,
        mask:torch.Tensor=None
    ) -> torch.Tensor:
        x = self.mha(x,x,x,mask)
        x = self.dropout_mha(x)
        return x
        
    def __get_ffn_output(
        self,
        x:torch.Tensor,
    ) -> torch.Tensor:
        x = self.ffn(x)
        x = self.dropout_ffn(x)
        return x