<a href="https://colab.research.google.com/github/priyal6/NLP-Prac/blob/main/positionalembed.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn

class AbsolutePositionalEmbedding(nn.Module):
  def __init__(self, max_len: int, d_model:int):
    super().__init__()
    self.pos_embedding = nn.Embedding(max_len, d_model)

  def forward(self, x: torch.Tensor) -> torch.Tensor:
    seq_len = x.size(1)
    positions = torch.arrange(seq_len, device=x.device)
    pos_emb = self.pos_embedding(positions)
    return x + pos_emb


In [2]:
class RelativePositionalBias(nn.Module):
  def __init__(self, max_len:int):
    super().__init__()
    self.max_len = max_len
    self.rel_bias = nn.Embedding(2* max_len-1, 1)

  def forward(self, seq_len: int) -> torch.Tensor:
    positions = torch.arange(seq_len)
    rel_positions = positions[None, :] - positions[:, None]
    rel_positions += self.max_len - 1
    return self.rel_bias(rel_positions).squeeze(-1)

In [3]:
def rotate_half(x):
    x1, x2 = x.chunk(2, dim=-1)
    return torch.cat((-x2, x1), dim=-1)


def apply_rope(x, sin, cos):
    return (x * cos) + (rotate_half(x) * sin)


def rope_frequencies(seq_len, dim, device):
    theta = 10000 ** (-torch.arange(0, dim, 2, device=device) / dim)
    positions = torch.arange(seq_len, device=device)
    angles = positions[:, None] * theta[None, :]

    sin = torch.sin(angles)
    cos = torch.cos(angles)

    sin = torch.repeat_interleave(sin, 2, dim=-1)
    cos = torch.repeat_interleave(cos, 2, dim=-1)
    return sin, cos
