<a href="https://colab.research.google.com/github/samitha278/CoreLlama/blob/main/positional_encoding_methods.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [53]:
import torch
import torch.nn as nn
import matplotlib.pyplot as plt

### Binary value function

In [55]:
def _get_binary(value):

    if value == 1 or value == 0 :
        return f"{value}"

    binary = f"{_get_binary(value//2)}{value%2}"

    return binary

bin = _get_binary(16)
print(bin)
bin_list = list(map(int,bin))
tensor = torch.tensor(bin_list)
print(tensor)

10000
tensor([1, 0, 0, 0, 0])


In [62]:
zeros = torch.zeros((4,6))
zeros[2,6-3:] = torch.tensor([1,2,3])
zeros

tensor([[0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 2., 3.],
        [0., 0., 0., 0., 0., 0.]])

# Binary Positional Encoding

### Slow version

In [61]:
class BinaryPE():

    def __init__(self,max_len,d_model):

        self.encoding = self._precompute_binary(max_len,d_model)


    def _get_binary(self,value):

        if value == 1 or value == 0 :
            return f"{value}"

        binary = f"{self._get_binary(value//2)}{value%2}"

        return binary


    def _precompute_binary(self,max_len,d_model):

        binary_table = torch.zeros((max_len,d_model))

        for pos in range(max_len):
            binary = list(map(int,self._get_binary(pos)))
            print(binary)

            binary_table[pos,d_model-len(binary):] = torch.tensor(binary)

        return binary_table



    def forward(self,ids):
        return self.encoding(ids)


In [59]:
binary_pe = BinaryPE(4,6)
binary_pe.encoding

[0]
[1]
[1, 0]
[1, 1]


tensor([[0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1., 1.]])

### Optimized version

In [74]:
class BinaryPositionalEncoding(nn.Module):

    def __init__(self,max_len,d_model):
        super().__init__()

        # to move with model to device(GPU)
        self.register_buffer('encoding', self._precompute_binary(max_len, d_model))


    def _precompute_binary(self, max_len, d_model):

        positions = torch.arange(max_len).unsqueeze(1)  # [max_len, 1]

        # bit indices (msb to lsb)
        bit_indices = torch.arange(d_model - 1, -1, -1).unsqueeze(0)  # [1, d_model]

        # right shift positions by bit_indices and mask with 1
        binary_table = (positions >> bit_indices) & 1     # braodcast tensors

        return binary_table.float()


    def forward(self,ids):
        return self.encoding[ids]

For position 5 (binary 0101):

- (5 >> 3) & 1 = (0) & 1 = 0  # MSB (bit 3)
- (5 >> 2) & 1 = (1) & 1 = 1  # bit 2
- (5 >> 1) & 1 = (2) & 1 = 0  # bit 1
- (5 >> 0) & 1 = (5) & 1 = 1  # LSB (bit 0)

Result: [0, 1, 0, 1]

In [82]:
binary_pe = BinaryPositionalEncoding(8,12)
binary_pe(torch.arange(0,8))

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1.]])

# Sinusoidal Positional Encoding

## Rotary Positional Encoding - RoPE