In [1]:
import math
import numpy as np  # Assuming numpy is imported as np
from transformers import BertTokenizer  # Import BertTokenizer from Transformers

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
def positional_encoding(position, d_model):
    """
    Calculates positional encoding for a given position and embedding dimension

    Args:
        position: the position for which to calculate the encoding
        d_model: the dimension of the embedding

    returns:
        A positional encoding vector of shape (d_model)
    """

    if position not in range(0,d_model):
        raise ValueError("Position not in valid range (0 to d_model-1)")
    
    i = 0
    while i<d_model:

        div_term = 1.0/ (math.pow(10000.0, (2 * (i//2))/d_model))

        if i % 2 == 0:
            return math.sin(position * div_term)
        else:
            return math.cos(position * div_term)
    

def create_positional_encoding(max_len, d_model):
    """
    Creates a positional encoding matrix for a given maximum sequence length

    Args:
        max_len : the maximum sequence length
        d_model: the dimension of the embedding

    Returns:
        A positional encoding matrix of shape (max_len, d_model).
    """

    pos_encoding = [positional_encoding(i,d_model) for i in range(max_len)]

    return np.array([positional_encoding(i, d_model) for i in range(max_len)])# Add leading dimension for broadcasting




In [8]:
## Sample document
document =  "this is an example document for the positional encoding"

## load BERT tokenizer -  BERT-base-uncased
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

## Tokenize the document
tokenized_document = tokenizer.encode(document, add_special_tokens=True)

max_len = len(tokenized_document)
d_model = 768

## create positional encoding matrix
pos_encoding = create_positional_encoding(max_len, d_model)

# placeholder for the word embeddings
word_embeddings = np.random.randn(max_len, d_model)

# combine word embedings and positional encodings
embeddings =  word_embeddings + pos_encoding

print("Example document:", document)
print("tokenized document:", tokenized_document)
print("Shape of word embedding:", word_embeddings.shape)
print("Shape of word embeddings:", word_embeddings.shape)
print("Shape of positional encodings:", pos_encoding.shape)
print("Shape of combined embeddings:", embeddings.shape)

ValueError: operands could not be broadcast together with shapes (12,768) (12,) 