# **POSITIONAL EMBEDDINGS (ENCODING WORD POSITIONS)**

NB: lecture note found here **[ðŸ”¹ Lecture 11 Notes ðŸ”¹](lecture_11_notes.md)**


In [1]:
import torch
import tiktoken
from custom_dataloader import create_dataloader_v1, GPTDatasetV1

In [2]:
with open("./data/the-verdict.txt", 'r', encoding="utf-8") as f:
    raw_text = f.read()

In [18]:
vocab_size = 50257
output_dim = 256

token_embedding_layer = torch.nn.Embedding(vocab_size, output_dim)
token_embedding_layer.weight

Parameter containing:
tensor([[-0.6650, -0.3960, -1.6920,  ...,  0.6323, -0.3739, -1.9089],
        [-0.6729, -1.0566, -0.1262,  ...,  0.9918, -1.6732,  0.6297],
        [ 1.1916,  0.3991,  0.7348,  ..., -1.2732,  1.2927, -0.3377],
        ...,
        [-0.8206,  0.0044, -0.8874,  ...,  1.2985,  0.3450,  1.5208],
        [ 1.9297, -0.3648,  0.3911,  ..., -0.0544,  0.3523, -1.1824],
        [-1.0308,  0.4683, -1.8401,  ...,  1.6654, -1.3470,  0.2870]],
       requires_grad=True)

In [20]:
token_embedding_layer.weight[0], token_embedding_layer.weight[0].shape # this is one vocab

(tensor([-6.6501e-01, -3.9598e-01, -1.6920e+00,  4.3930e-01, -3.3955e-01,
         -5.5913e-01,  5.9424e-01,  1.0151e-01,  1.5785e-01,  6.6164e-01,
          4.1983e-01, -8.1297e-01,  1.7353e+00,  5.1869e-01,  1.2715e+00,
          1.0389e+00, -1.1038e+00,  6.5114e-01, -1.1008e+00, -4.0501e-01,
         -6.9144e-02, -8.8717e-01,  1.2001e+00, -2.7480e-01, -4.0364e-01,
          4.1064e-01,  2.1192e+00,  2.2398e-01, -8.2687e-01,  5.6745e-01,
          9.2227e-02,  1.6636e+00,  9.5709e-01,  4.9087e-01, -2.2391e-01,
          3.9019e-01, -7.3762e-01,  3.8206e-01,  9.0920e-01, -1.5982e+00,
          1.1482e-01,  1.7040e+00, -1.5467e+00,  1.3963e-01,  6.0206e-01,
          5.2654e-01, -1.0954e+00, -3.2106e-01, -1.5207e+00, -1.1420e+00,
         -1.7619e+00,  6.5756e-04, -1.7530e+00,  7.3386e-01, -1.5916e+00,
          2.2092e+00,  1.5543e-01,  8.9082e-01,  8.9937e-01, -5.5425e-01,
          7.5784e-01, -2.1007e+00, -2.4645e+00,  1.0643e+00, -9.0369e-01,
          1.5556e+00, -1.1885e+00, -1.

In [8]:
# instantiate the data loader
max_length = 4
dataloader = create_dataloader_v1(
    raw_text, 
    batch_size=8, 
    max_length=max_length,
    stride=max_length,
    shuffle=False
)

data_iter = iter(dataloader)
inputs, targets = next(data_iter)

In [9]:
print(f"Token IDs:\n {inputs}")
print(f"\nInput shape:\n {inputs.shape}")
# print(f"\Target:\n {targets}")

Token IDs:
 tensor([[   40,   367,  2885,  1464],
        [ 1807,  3619,   402,   271],
        [10899,  2138,   257,  7026],
        [15632,   438,  2016,   257],
        [  922,  5891,  1576,   438],
        [  568,   340,   373,   645],
        [ 1049,  5975,   284,   502],
        [  284,  3285,   326,    11]])

Input shape:
 torch.Size([8, 4])


In [10]:
# for each in a batch, one embedding vector of `256` length is generated for each token in input
token_embedding = token_embedding_layer(inputs)
print(token_embedding.shape)
print(token_embedding)

torch.Size([8, 4, 256])
tensor([[[-0.2046, -1.8466, -0.6487,  ..., -0.2582,  0.1805, -0.3588],
         [ 2.0577,  0.4270,  0.8602,  ..., -2.3949,  1.9270, -0.3367],
         [ 0.3987,  0.3101, -0.4292,  ..., -0.2538, -0.3918,  0.1003],
         [-0.2639,  1.0121,  2.9990,  ..., -0.8792, -0.3242, -2.2238]],

        [[-0.2239,  0.0800,  0.3958,  ...,  1.3972, -0.5631,  2.5883],
         [ 0.2781, -0.5884,  0.2849,  ...,  0.0181,  0.9689,  1.3072],
         [ 0.8199, -0.7313,  0.4791,  ..., -0.5266, -0.6722, -0.3094],
         [ 1.6827, -0.9518,  1.0688,  ..., -1.0927,  0.5437, -0.6888]],

        [[ 0.4752, -0.6191, -1.0162,  ...,  0.9022,  1.2743, -0.5299],
         [ 0.1380, -0.1904,  0.1648,  ..., -1.0455,  0.9159, -0.4218],
         [ 1.3723,  0.2261, -0.4622,  ...,  1.9858, -0.1862,  0.3600],
         [ 1.4143, -1.5072, -0.3089,  ..., -1.1498,  0.7676,  1.2589]],

        ...,

        [[-0.9453,  0.8578,  0.8378,  ..., -0.2496,  0.0051, -0.4226],
         [ 0.4659,  0.0655, -0.15

In [14]:
token_embedding[0]

tensor([[-0.2046, -1.8466, -0.6487,  ..., -0.2582,  0.1805, -0.3588],
        [ 2.0577,  0.4270,  0.8602,  ..., -2.3949,  1.9270, -0.3367],
        [ 0.3987,  0.3101, -0.4292,  ..., -0.2538, -0.3918,  0.1003],
        [-0.2639,  1.0121,  2.9990,  ..., -0.8792, -0.3242, -2.2238]],
       grad_fn=<SelectBackward0>)

In [13]:
token_embedding[0][0] # token embedding for token id 40

tensor([-0.2046, -1.8466, -0.6487,  1.7777,  0.3168,  1.3174,  0.6371,  0.1363,
         0.4252,  1.1606, -1.1153, -1.1815, -1.8826,  0.2098, -0.7317,  1.7523,
        -0.5189, -1.3398,  0.3115, -0.2412,  1.5987, -1.4546, -0.5183, -0.0564,
        -0.0848, -0.8238,  1.7346, -0.8185, -0.6581,  0.5450, -0.5677, -0.4923,
         0.5269,  0.4290,  1.5571,  0.0674, -1.5752, -1.1630, -2.0091,  0.1694,
         0.1650,  1.8256,  0.2122, -0.0271,  1.1922, -0.4513,  1.5860, -2.2039,
         0.2653,  1.0871, -0.8103, -0.1782, -0.1119,  0.1606, -0.4982, -1.6447,
        -0.0306,  0.8655, -0.6684,  0.1114,  0.6961, -0.3315,  1.1788, -1.6366,
        -0.4531,  1.6445, -0.0268, -0.0833, -0.1129, -0.3096, -0.3807, -0.4678,
         1.3557,  1.0988,  0.7483,  0.1273, -0.3438, -1.9202,  0.0170,  1.5317,
        -0.1547,  0.4188,  0.6444, -0.6848,  2.6404,  0.4245,  0.6672, -0.7766,
         0.2676, -1.4277, -1.0998,  2.3620, -0.2716, -0.0071,  0.6622, -1.3912,
         0.7988, -0.1226,  0.1014, -0.60

In [15]:
context_length = max_length
# in position encoding, the number of rows should always be the same as the context length and also the columns should also match vector dimension
pos_embedding_layer = torch.nn.Embedding(context_length, output_dim)
pos_embedding_layer.weight, pos_embedding_layer.weight.shape

(Parameter containing:
 tensor([[-0.3488,  1.0694,  1.2844,  ...,  0.8013,  0.4989,  0.0151],
         [-1.3783, -0.5850,  0.3231,  ..., -0.6046, -0.1767,  0.2799],
         [ 0.2607,  0.0254,  0.5547,  ..., -0.3113, -0.8192, -1.2281],
         [-1.5563,  2.0962,  0.2940,  ..., -1.0076,  0.4094,  0.9149]],
        requires_grad=True),
 torch.Size([4, 256]))

In [16]:
pos_embeddings = pos_embedding_layer(torch.arange(max_length))
print(pos_embeddings.shape)
print(pos_embeddings)

torch.Size([4, 256])
tensor([[-0.3488,  1.0694,  1.2844,  ...,  0.8013,  0.4989,  0.0151],
        [-1.3783, -0.5850,  0.3231,  ..., -0.6046, -0.1767,  0.2799],
        [ 0.2607,  0.0254,  0.5547,  ..., -0.3113, -0.8192, -1.2281],
        [-1.5563,  2.0962,  0.2940,  ..., -1.0076,  0.4094,  0.9149]],
       grad_fn=<EmbeddingBackward0>)
