In [None]:
# length is the seq_len , depth is the embeding dim
def positional_encoding(length, depth):
    positions=np.arange(length)[:,np.newaxis]  # seq_len , 1
    
    depths=np.arange(depth)[np.newaxis , : ]  # 1, depth
    
    angle_rates=1/np.power(10000,(2 * depths // 2)) / depth
    angles=positions*angle_rates    # pos , depth 
    angles[:, 0::2] = np.sin(angles[:, 0::2])
    angles[:, 1::2] = np.cos(angles[:, 1::2])
    
    return tf.cast(angles,tf.float32)

In [None]:
class PositionalEmbedding(tf.keras.layers.layer):
    def __init__(self,vocab_size,d_model):
        super().__init__()
        self.d_model=d_model
        self.embedding=Embedding(vocab_size,d_model,mask_zero=True)
        # here pos_encoding are generated with length (seq_length) = 2048 
        self.pos_encoding=positional_encoding(length=2048,depth=d_model)
        
    def compute_mask(self, *args, **kwargs):
       return self.embedding.compute_mask(*args, **kwargs)
   
    def call(self,x):
        # x is input its shape will be  batch_size,seq_len
        length=tf.shape(x)[1]
        x=self.embedding(x)
        # now x will be of shape batch_size,seq_len,embed_dim
        
        # This factor sets the relative scale of the embedding and positonal_encoding.
        x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
        
        x=x+self.positional_encoding[tf.newaxis,:length,:]