In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import math
import random
import numpy as np

torch.manual_seed(12046)
np.random.seed(12046)
random.seed(12046)

In [2]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000, dropout=0.1):
        super().__init__()
        self.dropout = nn.Dropout(dropout)

        # initiate pe
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) # (max_len, 1)

        for i in range(0, d_model, 2):
            pe[:, i] = torch.sin(position * (10000 ** (i / d_model)))
            if i + 1 < d_model:
                pe[:, i + 1] = torch.cos(position * (10000 ** (i / d_model)))
        
        pe = pe.unsqueeze(0) # (1, max_len, d_model)
        self.register_buffer('pe', pe)

    def forward(self, x):
        # x: (batch_size, seq_len, d_model)
        x = x + self.pe[:, :x.size(1)] # boardcasting
        return self.dropout(x)