In [1]:
import numpy as np
import tensorflow as tf

In [2]:
sequence_length = 10
d_model = 6

$$
PE(\text{position}, 2i) = \sin\bigg( \frac{ \text{position} }{10000^\frac{2i}{d_{model}}} \bigg)
$$

$$
PE(\text{position}, 2i+1) = \cos\bigg( \frac{ \text{position} }{10000^\frac{2i}{d_{model}}} \bigg)
$$

We can rewrite these as

$$
PE(\text{position}, i) = \sin\bigg( \frac{ \text{position} }{10000^\frac{i}{d_{model}}} \bigg) \text{ when i is even}
$$

$$
PE(\text{position}, i) = \cos\bigg( \frac{ \text{position} }{10000^\frac{i-1}{d_{model}}} \bigg) \text{ when i is odd}
$$

In [5]:
even_i = np.arange(0, d_model, 2, dtype=np.float32)
even_i

array([0., 2., 4.], dtype=float32)

In [6]:
even_denominator = np.power(10000, even_i/d_model)
even_denominator

array([  1.     ,  21.54435, 464.15897], dtype=float32)

In [7]:
odd_i = np.arange(1, d_model, 2, dtype = np.float32)
odd_i

array([1., 3., 5.], dtype=float32)

In [9]:
odd_denominator = np.power(10000, (odd_i-1)/d_model)
odd_denominator

array([  1.     ,  21.54435, 464.15897], dtype=float32)

`even_denominator` and `odd_denominator` are the same! So we can just do one of these actions and call the resulting variable `denominator`

In [10]:
denominator = even_denominator

In [13]:
position = np.arange(sequence_length, dtype=np.float32).reshape(sequence_length,1)
position

array([[0.],
       [1.],
       [2.],
       [3.],
       [4.],
       [5.],
       [6.],
       [7.],
       [8.],
       [9.]], dtype=float32)

In [15]:
even_PE = np.sin(position / denominator)
odd_PE = np.cos(position / denominator)

In [16]:
even_PE

array([[ 0.        ,  0.        ,  0.        ],
       [ 0.84147096,  0.04639922,  0.00215443],
       [ 0.9092974 ,  0.09269849,  0.00430886],
       [ 0.14112   ,  0.13879807,  0.00646326],
       [-0.7568025 ,  0.18459871,  0.00861763],
       [-0.9589243 ,  0.23000169,  0.01077196],
       [-0.2794155 ,  0.27490923,  0.01292625],
       [ 0.6569866 ,  0.31922463,  0.01508047],
       [ 0.98935825,  0.36285236,  0.01723462],
       [ 0.4121185 ,  0.4056985 ,  0.01938869]], dtype=float32)

In [17]:
position.shape

(10, 1)

In [18]:
denominator.shape

(3,)

In [19]:
even_PE.shape

(10, 3)

In [20]:
odd_PE

array([[ 1.        ,  1.        ,  1.        ],
       [ 0.5403023 ,  0.998923  ,  0.9999977 ],
       [-0.41614684,  0.9956942 ,  0.9999907 ],
       [-0.9899925 ,  0.9903207 ,  0.99997914],
       [-0.6536436 ,  0.98281395,  0.99996287],
       [ 0.2836622 ,  0.97319025,  0.999942  ],
       [ 0.96017027,  0.9614702 ,  0.99991643],
       [ 0.75390226,  0.9476791 ,  0.9998863 ],
       [-0.14550003,  0.9318466 ,  0.99985147],
       [-0.91113025,  0.91400695,  0.999812  ]], dtype=float32)

In [30]:
stacked = np.stack([even_PE, odd_PE], axis = 2)

In [32]:
stacked.shape

(10, 3, 2)

In [34]:
PE = np.reshape(stacked, (10,6))
PE.shape

(10, 6)

In [36]:
print(PE)

[[ 0.          1.          0.          1.          0.          1.        ]
 [ 0.84147096  0.5403023   0.04639922  0.998923    0.00215443  0.9999977 ]
 [ 0.9092974  -0.41614684  0.09269849  0.9956942   0.00430886  0.9999907 ]
 [ 0.14112    -0.9899925   0.13879807  0.9903207   0.00646326  0.99997914]
 [-0.7568025  -0.6536436   0.18459871  0.98281395  0.00861763  0.99996287]
 [-0.9589243   0.2836622   0.23000169  0.97319025  0.01077196  0.999942  ]
 [-0.2794155   0.96017027  0.27490923  0.9614702   0.01292625  0.99991643]
 [ 0.6569866   0.75390226  0.31922463  0.9476791   0.01508047  0.9998863 ]
 [ 0.98935825 -0.14550003  0.36285236  0.9318466   0.01723462  0.99985147]
 [ 0.4121185  -0.91113025  0.4056985   0.91400695  0.01938869  0.999812  ]]


## Class

In [52]:
class PositionalEncoding():
    def __init__(self, sequence_length, d_model):
        super().__init__()
        self.sequence_length = sequence_length
        self.d_model = d_model

    def forward(self):
        even_i = np.arange(0, d_model, 2, dtype = np.float32)
        denominator = np.power(10000, even_i/d_model)
        position = np.arange(sequence_length, dtype = np.float32).reshape(sequence_length,1)
        even_PE = np.sin(position / denominator)
        odd_PE = np.cos(position / denominator)
        stacked = np.stack([even_PE, odd_PE], axis = 2)
        PE = np.reshape(stacked, (stacked.shape[0], stacked.shape[1] * stacked.shape[2]))
        return PE

In [53]:
PE = PositionalEncoding(sequence_length, d_model)

In [55]:
print(PE.forward())

[[ 0.          1.          0.          1.          0.          1.        ]
 [ 0.84147096  0.5403023   0.04639922  0.998923    0.00215443  0.9999977 ]
 [ 0.9092974  -0.41614684  0.09269849  0.9956942   0.00430886  0.9999907 ]
 [ 0.14112    -0.9899925   0.13879807  0.9903207   0.00646326  0.99997914]
 [-0.7568025  -0.6536436   0.18459871  0.98281395  0.00861763  0.99996287]
 [-0.9589243   0.2836622   0.23000169  0.97319025  0.01077196  0.999942  ]
 [-0.2794155   0.96017027  0.27490923  0.9614702   0.01292625  0.99991643]
 [ 0.6569866   0.75390226  0.31922463  0.9476791   0.01508047  0.9998863 ]
 [ 0.98935825 -0.14550003  0.36285236  0.9318466   0.01723462  0.99985147]
 [ 0.4121185  -0.91113025  0.4056985   0.91400695  0.01938869  0.999812  ]]
