In [16]:
import os
import sys
import importnb
from torch import nn
import torch
import numpy as np

In [17]:
notebook_path = os.getcwd()
parent_dir = os.path.dirname(notebook_path)
sys.path.append(parent_dir)
with __import__("importnb").Notebook():
    from utils.tools import MultiHeadAttention
    from utils.tools import AddPositionalEncoding
    from utils.tools import TransformerFFN

In [18]:
class TransformerEncoderLayer(nn.Module):

    def __init__(
        self,
        d_model: int,
        d_ff: int,
        num_head: int,
        dropout_rate: float,
        layer_norm_eps: float,
    ) -> None:
        super().__init__()
        # layerの宣言
        self.mha = MultiHeadAttention(num_head, d_model)
        self.layernorm_mha = nn.LayerNorm(d_model, eps=layer_norm_eps)
        self.dropout_mha = nn.Dropout(dropout_rate)

        self.ffn = TransformerFFN(d_model, d_ff)
        self.dropout_ffn = nn.Dropout(dropout_rate)
        self.layernorm_ffn = nn.LayerNorm(d_model, eps=layer_norm_eps)

    def forward(self, x: torch.Tensor, mask: torch.Tensor = None) -> torch.Tensor:

        # attention層を通す
        # add+layernorm
        x = self.layernorm_mha(self.__get_mha_output(x, mask) + x)

        # FFN層を通す
        # add+layernorm
        x = self.layernorm_ffn(self.__get_ffn_output(x) + x)

        return x

    def __get_mha_output(
        self, x: torch.Tensor, mask: torch.Tensor = None
    ) -> torch.Tensor:
        x = self.mha(x, x, x, mask)
        x = self.dropout_mha(x)
        return x

    def __get_ffn_output(
        self,
        x: torch.Tensor,
    ) -> torch.Tensor:
        x = self.ffn(x)
        x = self.dropout_ffn(x)
        return x

In [24]:
class TransformerEncoder(nn.Module):
    def __init__(
        self,
        d_model: int,
        d_ff: int,
        num_head: int,
        dropout_rate: float,
        layer_norm_eps: float,
        # 変更点
        max_len: int,
        src_vocab_size: int,
        N: int,
        pad_idx: int,
        device: torch.device = torch.device("cpu"),
    ) -> None:
        super().__init__()
        # InputEmbedding層の定
        self.embedding = nn.Embedding(src_vocab_size, d_model, pad_idx)
        # positionalencoding層の定義
        self.pos = AddPositionalEncoding(d_model, max_len, device)
        # encoderlayer層の定義
        self.encoder_layers = nn.ModuleList(
            [
                TransformerEncoderLayer(
                    d_model, d_ff, num_head, dropout_rate, layer_norm_eps
                )
                for _ in range(N)
            ]
        )

    def forward(
        self,
        x: torch.Tensor,
        mask: torch.Tensor = None,
    ) -> torch.Tensor:
        # テンソルを表す変数（例：input_tensor）があると仮定
        assert (
            x.dtype == torch.int64 or x.dtype == torch.int32
        ), "xを整数型にしてください"
        x = self.embedding(x)
        # x = self.pos(x)
        for layer in self.encoder_layers:
            x = layer(x, mask)
        return x