# SIN position encoding

In [1]:
import torch
from torch import nn
import numpy as np

## calculations

$$
\begin{split}
PE(t,2i)&=\sin\left(\frac{t}{10000^{\frac{2i}{d_{model}}}}\right)\\
PE(t,2i+1)&=\cos\left(\frac{t}{10000^{\frac{2i}{d_{model}}}}\right)\\
\Downarrow\\
PE(t,i)&=\sin\left(\frac{t}{10000^{\frac{i}{d_{model}}}}\right), \quad \text{i is even}\\
PE(t,i)&=\cos\left(\frac{t}{10000^{\frac{i-1}{d_{model}}}}\right), \quad \text{i is odd}\\
\end{split}
$$

## cases

In [2]:
max_sequence_length = 10
d_model = 6

In [19]:
even_i = torch.arange(0, d_model, 2).float()

odd_i = torch.arange(1, d_model, 2).float()

In [16]:
position = torch.arange(max_sequence_length, dtype=torch.float32).reshape(-1, 1)

In [21]:
even_pe = torch.sin(position / torch.pow(10000, even_i/d_model))

odd_pe = torch.cos(position / torch.pow(10000, (odd_i - 1)/d_model))

In [22]:
stacked = torch.stack([even_pe, odd_pe], dim=2)

# (10, 3, 2) => (10, 6)
pe = torch.flatten(stacked, start_dim=1, end_dim=2)

In [10]:
t = 1
for i in range(d_model):
    if i % 2 == 0:
        print(f'{np.sin(t / 10000**(i/d_model)):.4f}', end=' ')
    else:
        print(f'{np.cos(t / 10000**((i-1)/d_model)):.4f}', end=' ')

0.8415 0.5403 0.0464 0.9989 0.0022 1.0000 

## from scartch

In [27]:
class SinPositionEncoding(nn.Module):
    def __init__(self, max_sequence_length, d_model, base=10000):
        super().__init__()
        self.max_sequence_length = max_sequence_length
        self.d_model = d_model
        self.base = base
    
    def forward(self):
        even_i = torch.arange(0, self.d_model, 2).float()
        odd_i = torch.arange(1, self.d_model, 2).float()
        position = torch.arange(self.max_sequence_length, dtype=torch.float).reshape(-1, 1)
        even_pe = torch.sin(position / torch.pow(self.base, even_i/self.d_model))
        odd_pe = torch.cos(position / torch.pow(self.base, (odd_i-1)/self.d_model))
        stacked = torch.stack([even_pe, odd_pe], dim=2)
        return torch.flatten(stacked, start_dim=1, end_dim=2)

spe = SinPositionEncoding(max_sequence_length=10, d_model=6)

## Compare the result
spe() = pe

In [26]:
spe()

tensor([[ 0.0000,  1.0000,  0.0000,  1.0000,  0.0000,  1.0000],
        [ 0.8415,  0.5403,  0.0464,  0.9989,  0.0022,  1.0000],
        [ 0.9093, -0.4161,  0.0927,  0.9957,  0.0043,  1.0000],
        [ 0.1411, -0.9900,  0.1388,  0.9903,  0.0065,  1.0000],
        [-0.7568, -0.6536,  0.1846,  0.9828,  0.0086,  1.0000],
        [-0.9589,  0.2837,  0.2300,  0.9732,  0.0108,  0.9999],
        [-0.2794,  0.9602,  0.2749,  0.9615,  0.0129,  0.9999],
        [ 0.6570,  0.7539,  0.3192,  0.9477,  0.0151,  0.9999],
        [ 0.9894, -0.1455,  0.3629,  0.9318,  0.0172,  0.9999],
        [ 0.4121, -0.9111,  0.4057,  0.9140,  0.0194,  0.9998]])

In [28]:
pe

tensor([[ 0.0000,  1.0000,  0.0000,  1.0000,  0.0000,  1.0000],
        [ 0.8415,  0.5403,  0.0464,  0.9989,  0.0022,  1.0000],
        [ 0.9093, -0.4161,  0.0927,  0.9957,  0.0043,  1.0000],
        [ 0.1411, -0.9900,  0.1388,  0.9903,  0.0065,  1.0000],
        [-0.7568, -0.6536,  0.1846,  0.9828,  0.0086,  1.0000],
        [-0.9589,  0.2837,  0.2300,  0.9732,  0.0108,  0.9999],
        [-0.2794,  0.9602,  0.2749,  0.9615,  0.0129,  0.9999],
        [ 0.6570,  0.7539,  0.3192,  0.9477,  0.0151,  0.9999],
        [ 0.9894, -0.1455,  0.3629,  0.9318,  0.0172,  0.9999],
        [ 0.4121, -0.9111,  0.4057,  0.9140,  0.0194,  0.9998]])