In [3]:
import torch
import torch.nn as nn

$$
PE(\text{position}, 2i) = \sin\bigg( \frac{ \text{position} }{10000^\frac{2i}{d_{model}}} \bigg)
$$

$$
PE(\text{position}, 2i+1) = \cos\bigg( \frac{ \text{position} }{10000^\frac{2i}{d_{model}}} \bigg)
$$

We can rewrite these as

$$
PE(\text{position}, i) = \sin\bigg( \frac{ \text{position} }{10000^\frac{i}{d_{model}}} \bigg) \text{ when i is even}
$$

$$
PE(\text{position}, i) = \cos\bigg( \frac{ \text{position} }{10000^\frac{i-1}{d_{model}}} \bigg) \text{ when i is odd}
$$

In [4]:
max_seq = 10
d_model = 6


In [5]:
even_idx = torch.arange(0,d_model,2)
even_idx

tensor([0, 2, 4])

In [6]:
even_denominator = torch.pow(10000,(even_idx)/d_model).float()
even_denominator

tensor([  1.0000,  21.5443, 464.1590])

In [7]:
odd_idx = torch.arange(1,d_model,2)
odd_idx

tensor([1, 3, 5])

In [8]:
odd_denominator = torch.pow(10000,((odd_idx)-1)/d_model).float()
odd_denominator

tensor([  1.0000,  21.5443, 464.1590])

In [9]:
odd_denominator == even_denominator

tensor([True, True, True])

In [10]:
denominator = even_denominator

In [15]:
position = torch.arange(max_seq,dtype = torch.float64).reshape(max_seq,1)
position

tensor([[0.],
        [1.],
        [2.],
        [3.],
        [4.],
        [5.],
        [6.],
        [7.],
        [8.],
        [9.]], dtype=torch.float64)

In [17]:
even_pos = torch.sin(position/denominator)
odd_pos = torch.cos(position/denominator)

In [18]:
print(even_pos.shape)
even_pos

torch.Size([10, 3])


tensor([[ 0.0000,  0.0000,  0.0000],
        [ 0.8415,  0.0464,  0.0022],
        [ 0.9093,  0.0927,  0.0043],
        [ 0.1411,  0.1388,  0.0065],
        [-0.7568,  0.1846,  0.0086],
        [-0.9589,  0.2300,  0.0108],
        [-0.2794,  0.2749,  0.0129],
        [ 0.6570,  0.3192,  0.0151],
        [ 0.9894,  0.3629,  0.0172],
        [ 0.4121,  0.4057,  0.0194]], dtype=torch.float64)

In [20]:
print(odd_pos.shape)
odd_pos

torch.Size([10, 3])


tensor([[ 1.0000,  1.0000,  1.0000],
        [ 0.5403,  0.9989,  1.0000],
        [-0.4161,  0.9957,  1.0000],
        [-0.9900,  0.9903,  1.0000],
        [-0.6536,  0.9828,  1.0000],
        [ 0.2837,  0.9732,  0.9999],
        [ 0.9602,  0.9615,  0.9999],
        [ 0.7539,  0.9477,  0.9999],
        [-0.1455,  0.9318,  0.9999],
        [-0.9111,  0.9140,  0.9998]], dtype=torch.float64)

In [30]:
stacked = torch.stack([even_pos,odd_pos],dim=2)
stacked,stacked.shape

(tensor([[[ 0.0000,  1.0000],
          [ 0.0000,  1.0000],
          [ 0.0000,  1.0000]],
 
         [[ 0.8415,  0.5403],
          [ 0.0464,  0.9989],
          [ 0.0022,  1.0000]],
 
         [[ 0.9093, -0.4161],
          [ 0.0927,  0.9957],
          [ 0.0043,  1.0000]],
 
         [[ 0.1411, -0.9900],
          [ 0.1388,  0.9903],
          [ 0.0065,  1.0000]],
 
         [[-0.7568, -0.6536],
          [ 0.1846,  0.9828],
          [ 0.0086,  1.0000]],
 
         [[-0.9589,  0.2837],
          [ 0.2300,  0.9732],
          [ 0.0108,  0.9999]],
 
         [[-0.2794,  0.9602],
          [ 0.2749,  0.9615],
          [ 0.0129,  0.9999]],
 
         [[ 0.6570,  0.7539],
          [ 0.3192,  0.9477],
          [ 0.0151,  0.9999]],
 
         [[ 0.9894, -0.1455],
          [ 0.3629,  0.9318],
          [ 0.0172,  0.9999]],
 
         [[ 0.4121, -0.9111],
          [ 0.4057,  0.9140],
          [ 0.0194,  0.9998]]], dtype=torch.float64),
 torch.Size([10, 3, 2]))

In [32]:
pos_emb = torch.flatten(stacked,start_dim=1,end_dim=2)
pos_emb.shape,pos_emb

(torch.Size([10, 6]),
 tensor([[ 0.0000,  1.0000,  0.0000,  1.0000,  0.0000,  1.0000],
         [ 0.8415,  0.5403,  0.0464,  0.9989,  0.0022,  1.0000],
         [ 0.9093, -0.4161,  0.0927,  0.9957,  0.0043,  1.0000],
         [ 0.1411, -0.9900,  0.1388,  0.9903,  0.0065,  1.0000],
         [-0.7568, -0.6536,  0.1846,  0.9828,  0.0086,  1.0000],
         [-0.9589,  0.2837,  0.2300,  0.9732,  0.0108,  0.9999],
         [-0.2794,  0.9602,  0.2749,  0.9615,  0.0129,  0.9999],
         [ 0.6570,  0.7539,  0.3192,  0.9477,  0.0151,  0.9999],
         [ 0.9894, -0.1455,  0.3629,  0.9318,  0.0172,  0.9999],
         [ 0.4121, -0.9111,  0.4057,  0.9140,  0.0194,  0.9998]],
        dtype=torch.float64))

In [74]:
class Positional_embeddings(nn.Module):
    def __init__(self,d_model,max_seq_len):
        self.d_model = d_model
        self.max_seq_len = max_seq_len

    def forward(self):
        even_idx = torch.arange(0,self.d_model,2)
        # odd_idx = torch.arange(1,d_model,2)
        denominator = torch.pow(10000,even_idx/d_model)
        # print(denominator)
        pos = torch.arange(self.max_seq_len,dtype=torch.float64).reshape(self.max_seq_len,1)
        # print(pos)
        even_pe = torch.sin(pos/denominator)
        # print(even_pe)
        odd_pe = torch.cos(pos/denominator)
        # print(odd_pe)
        stacked = torch.stack([even_pe,odd_pe],dim=2)
        # print(stacked)
        PE = torch.flatten(stacked,start_dim=1,end_dim=2)

        return PE

In [77]:
postion_em = Positional_embeddings(d_model=512,max_seq_len=200)

In [78]:
postion_em.forward()

tensor([[ 0.0000,  1.0000,  0.0000,  ...,  1.0000,  0.0000,  1.0000],
        [ 0.8415,  0.5403,  0.0464,  ...,  1.0000,  0.0000,  1.0000],
        [ 0.9093, -0.4161,  0.0927,  ...,  1.0000,  0.0000,  1.0000],
        ...,
        [ 0.7958, -0.6056,  0.2772,  ...,  1.0000,  0.0000,  1.0000],
        [-0.0796, -0.9968,  0.2323,  ...,  1.0000,  0.0000,  1.0000],
        [-0.8818, -0.4716,  0.1869,  ...,  1.0000,  0.0000,  1.0000]],
       dtype=torch.float64)