In [26]:
# import basic libarary
import pandas
import pickle
import math

# import torch module
from transformers import BertTokenizerFast
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import TransformerEncoder, TransformerEncoderLayer

In [31]:
class PositionalEncoding(nn.Module):
    def __init__(self, dimension, vocab_size, dropout):
        super().__init__()
        self.dropout = nn.Dropout(dropout)

        pe = torch.zeros(vocab_size, dimension)
        position = torch.arange(0, vocab_size, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(
            torch.arange(0, dimension, 2).float()*(-math.log(10000.0)/dimension)
        )
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)

    def forward(self, x):
        x = x + self.pe[:, :x.size(1), :]
        return self.dropout(x)

class TransoformerClassifierNetwork(nn.Module):
    def __init__(self, embeddings, nhead, dim_feedforward, num_layers, dropout, activation, vocab_size, dimension):
        super().__init__()

        self.emb = nn.Embedding(vocab_size, dimension)

        self.pos_encoder = PositionalEncoding(
            dimension = dimension,
            dropout = dropout,
            vocab_size = vocab_size
        )

        encoder_layer = TransformerEncoderLayer(
            d_model = dimension,
            nhead = nhead,
            dim_feedforward = dim_feedforward,
            dropout = dropout
        )

        self.transformer_encoder = TransformerEncoder(
            encoder_layer,
            num_layers = num_layers
        )

        self.linear = nn.Linear(dimension, 3)
        self.dimension = dimension

    def forward(self, input):
        x = self.emb(input)*math.sqrt(self.dimension)
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x)
        x = x.mean(dim=1)
        output = self.linear(x)
        return output
