In [8]:
import torch
from torch import nn, Tensor
import torch.nn.functional as F
from torch.nn import TransformerEncoder, TransformerEncoderLayer
import math
from torch.autograd import Variable

In [2]:
class EncoderModel(nn.Module): 


    def __init__(
        self,
        embedding_size: int,
        d_model: int,
        num_heads: int,
        num_layers: int,
        vocab_size: int,
        num_classes: int,
        seq_len: int,
        pe: int = 1024,
        rate: float = 0.1
    ):
        
        super().__init__()
        self.embedding_size = embedding_size
        self.d_model = d_model
        self.num_heads = num_heads
        self.num_layers = num_layers
        self.vocab_size = vocab_size
        self.num_classes = num_classes
        self.seq_len = seq_len
        self.pe = pe
        self.rate = rate
        
        self.embedding=nn.Embedding(vocab_size, embedding_size, pe)
        if embedding_size !=d_model:
            self.embedding_intermediate=nn.Linear(d_model)
        
        transformerencoderlayer=nn.TransformerEncoderLayer(self.d_model, 
                                                                self.num_heads, 
                                                                dim_feedforward=2048, 
                                                                dropout=self.rate, 
                                                                activation="gelu", 
                                                                layer_norm_eps=1e-05,
                                                                batch_first=True)
        
        self.pooling_embedding=nn.Embedding(1, self.d_model)
        self.output_layer = nn.Linear(self.num_classes, torch.sigmoid())

In [None]:
class EncoderLayer(nn.Module):
    def __init__(self, d_model, heads, dropout:float=0.1):
        super().__init__()
        

In [None]:
class Embedder(nn.Module):
    def __init__(self, vocab_size, d_model):
        super().__init__()
        self.d_model = d_model
        self.embed = nn.Embedding(vocab_size, d_model)
    def forward(self, x):
        return self.embed(x)

class PositionalEncoder(nn.Module):
    def __init__(self, d_model, max_seq_len = 200, dropout = 0.1):
        super().__init__()
        self.d_model = d_model
        self.dropout = nn.Dropout(dropout)
        # create constant 'pe' matrix with values dependant on 
        # pos and i
        pe = torch.zeros(max_seq_len, d_model)
        for pos in range(max_seq_len):
            for i in range(0, d_model, 2):
                pe[pos, i] = \
                math.sin(pos / (10000 ** ((2 * i)/d_model)))
                pe[pos, i + 1] = \
                math.cos(pos / (10000 ** ((2 * (i + 1))/d_model)))
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)
 
    
    def forward(self, x):
        # make embeddings relatively larger
        x = x * math.sqrt(self.d_model)
        #add constant to embedding
        seq_len = x.size(1)
        pe = Variable(self.pe[:,:seq_len], requires_grad=False)
        if x.is_cuda:
            pe.cuda()
        x = x + pe
        return self.dropout(x)