# **POSITIONAL EMBEDDINGS (ENCODING WORD POSITIONS)**

NB: lecture note found here **[ðŸ”¹ Lecture 11 Notes ðŸ”¹](lecture_11_notes.md)**


In [1]:
import torch
import tiktoken
from custom_dataloader import create_dataloader_v1, GPTDatasetV1

In [2]:
with open("./data/the-verdict.txt", 'r', encoding="utf-8") as f:
    raw_text = f.read()

In [3]:
vocab_size = 50257
output_dim = 256

token_embedding_layer = torch.nn.Embedding(vocab_size, output_dim)
token_embedding_layer.weight

Parameter containing:
tensor([[ 1.2418,  1.9990,  2.8610,  ..., -0.5559,  0.9598,  2.1412],
        [ 0.4748,  0.3104, -0.6230,  ..., -1.1620,  0.9855, -1.0334],
        [-0.7485, -0.5903,  2.1708,  ..., -0.0050,  0.7619,  2.7533],
        ...,
        [-0.4299,  1.7748, -1.0724,  ...,  1.0440, -0.3609, -0.2983],
        [-1.4111,  0.4731, -1.3539,  ..., -0.6688,  0.7525,  1.1471],
        [-0.5076, -0.2344, -1.0285,  ...,  1.1958, -0.1861,  2.7678]],
       requires_grad=True)

In [15]:
token_embedding_layer.weight[1]

tensor([ 0.4748,  0.3104, -0.6230,  1.1708, -1.1549,  0.0931, -0.4860, -2.4648,
        -1.4873, -0.5616,  1.3921,  1.7327,  0.0645,  0.5541, -0.0773, -0.2644,
         0.3761,  0.4460,  0.8174, -1.1426, -0.4609, -0.6330, -0.4704,  0.1360,
        -2.1255, -0.8119, -0.7579, -0.4400,  0.3207, -0.0504,  0.6423,  1.7533,
         0.9021,  1.5367,  1.7345, -0.1823,  0.6293,  0.6105,  0.0891, -0.1906,
         1.2174,  0.3690,  0.6673, -2.1186,  0.5779,  1.1807, -1.4425, -0.2911,
         1.2789, -0.0661, -1.1442,  1.4800,  0.1509, -0.4785, -0.1523, -1.6016,
         0.8765,  1.2124,  1.3430,  1.1046,  1.5181, -1.1457, -1.2325, -0.8779,
         0.4485,  1.2317, -0.4069,  0.7512,  0.0615,  0.5612, -0.4551,  0.8974,
        -0.1815, -0.0344,  0.6067,  0.8253,  0.1356, -0.5797,  0.7209, -0.2666,
         0.3019, -0.2783,  0.0196,  1.3538,  0.7541, -1.1488, -0.0379,  0.2517,
         2.8782, -0.5268,  0.0253,  0.2464,  1.4975,  1.6923,  0.1512, -0.2182,
         0.7652, -0.0905, -0.1101, -0.93

In [4]:
# instantiate the data loader
max_length = 4
dataloader = create_dataloader_v1(
    raw_text, 
    batch_size=8, 
    max_length=max_length,
    stride=max_length,
    shuffle=False
)

data_iter = iter(dataloader)
inputs, targets = next(data_iter)

In [7]:
print(f"Token IDs:\n {inputs}")
print(f"\nInput shape:\n {inputs.shape}")
# print(f"\Target:\n {targets}")

Token IDs:
 tensor([[   40,   367,  2885,  1464],
        [ 1807,  3619,   402,   271],
        [10899,  2138,   257,  7026],
        [15632,   438,  2016,   257],
        [  922,  5891,  1576,   438],
        [  568,   340,   373,   645],
        [ 1049,  5975,   284,   502],
        [  284,  3285,   326,    11]])

Input shape:
 torch.Size([8, 4])


In [16]:
# for each in a batch, one embedding vector of `256` length is generated for each token in input
token_embedding = token_embedding_layer(inputs)
print(token_embedding.shape)
print(token_embedding)

torch.Size([8, 4, 256])
tensor([[[ 5.6107e-01, -2.8721e+00, -2.0874e+00,  ...,  1.0934e+00,
           7.3342e-01, -5.5641e-01],
         [-2.3008e-01, -1.6473e-02, -9.2951e-01,  ..., -1.5629e+00,
           2.8862e-01, -7.0818e-01],
         [ 1.0120e+00, -1.6418e+00,  1.0437e-01,  ..., -4.5061e-01,
          -7.5167e-01, -2.0667e-01],
         [-5.8495e-01,  5.0312e-01,  1.0958e+00,  ..., -3.5038e-01,
          -9.5969e-01, -3.6839e-01]],

        [[-1.0266e+00,  1.0353e+00,  2.3454e+00,  ..., -8.9704e-01,
          -3.1230e-01,  1.9434e+00],
         [ 4.8666e-02,  1.1376e+00,  2.6233e-03,  ...,  3.5459e-01,
          -1.1449e+00,  1.8291e-01],
         [-8.6340e-01,  6.5589e-01, -1.3891e+00,  ...,  8.0684e-01,
          -1.2434e+00,  1.2133e+00],
         [ 2.1799e+00, -2.6330e-01, -4.2208e-01,  ..., -6.0304e-01,
          -1.9523e+00,  5.2990e-01]],

        [[-3.3212e-01,  1.3134e+00, -1.4113e+00,  ...,  3.0039e-01,
           5.5951e-01,  4.5453e-01],
         [ 2.7183e+00, -1.1

In [None]:
token_embedding[0][0] # token embedding for token id 40

tensor([ 5.6107e-01, -2.8721e+00, -2.0874e+00, -3.4492e+00, -7.8736e-01,
         9.2800e-01,  1.5405e-01,  5.3258e-01, -8.2493e-01,  7.4462e-01,
         2.6748e-01, -1.0141e+00,  5.7951e-01, -1.8981e+00,  4.7351e-01,
         1.3669e-01,  6.5274e-01, -5.6250e-01, -6.4175e-01, -1.3476e+00,
         1.9105e+00, -1.3222e+00, -1.4509e-01,  5.0085e-01, -3.7464e-01,
         3.2304e-01,  1.1412e+00, -3.0127e-02,  7.8203e-01, -2.8054e-01,
         3.8929e-02,  5.6186e-01,  5.6306e-02, -3.3603e-01, -1.4245e+00,
        -7.3667e-01,  1.1159e+00, -9.6995e-01,  4.2525e-01, -1.0085e+00,
         4.0250e-02, -1.2964e+00,  9.7455e-01,  1.1596e+00, -1.8250e-01,
         9.8200e-01,  8.5650e-01, -7.0462e-01, -2.5634e-01, -9.6582e-02,
        -1.0660e+00, -2.6402e-01,  4.9225e-01,  3.4891e-01, -1.6212e+00,
         4.9975e-02,  1.5994e+00,  7.0235e-02, -1.0234e+00,  1.1282e-02,
         7.3494e-01,  1.7988e+00,  1.9878e-01,  1.4086e+00, -1.0191e+00,
        -9.3497e-01,  1.6425e-03,  6.4572e-01,  1.6

In [21]:
context_length = max_length
# in position encoding, the number of rows should always be the same as the context length and also the columns should also match vector dimension
pos_embedding_layer = torch.nn.Embedding(context_length, output_dim)
pos_embedding_layer.weight, pos_embedding_layer.weight.shape

(Parameter containing:
 tensor([[-0.9333,  0.4751,  0.3587,  ..., -0.8867, -0.9275, -2.1109],
         [-1.1738, -0.3513,  0.5002,  ..., -1.8710, -2.1708,  0.1799],
         [ 1.3899, -1.0426, -0.6418,  ..., -0.0799, -1.5494,  0.2909],
         [ 0.3965, -1.6065, -2.0884,  ..., -2.6019, -0.0560,  1.6981]],
        requires_grad=True),
 torch.Size([4, 256]))

In [14]:
pos_embeddings = pos_embedding_layer(torch.arange(max_length))
print(pos_embeddings.shape)
print(pos_embeddings)

torch.Size([4, 256])
tensor([[-0.4073,  0.6113, -1.2282,  ..., -0.9778,  0.4884, -1.7974],
        [ 0.6138,  1.0506,  1.6585,  ..., -0.7342,  0.0286,  1.1887],
        [ 0.7795,  0.5807,  0.2142,  ...,  0.2365,  1.0706, -1.4617],
        [ 1.3325,  0.8733,  0.1519,  ...,  0.0089, -0.6072, -1.5975]],
       grad_fn=<EmbeddingBackward0>)
