## --------------Position-Embedding(Word Positions)-------------------

## 1. Creating input-pair targets dataloader

In [141]:
# Reading raw text file

with open("the-verdict.txt", "r") as f:
    raw_text = f.read()
raw_text[:30]

'I HAD always thought Jack Gisb'

In [142]:
from torch.utils.data import DataLoader

In [143]:
# Create the Input-Pair targets class
import tiktoken

class InputTargetV1:
    def __init__(self, text, context_size, stride):
        self.input_ids = []
        self.target_ids = []
        tokenizer = tiktoken.get_encoding("gpt2")
        token_ids = tokenizer.encode(text)
        for i in range(0, len(token_ids)-context_size, stride):
            input_chunk = token_ids[i: i+context_size]
            target_chunk = token_ids[i+1: i+context_size+1]
            self.input_ids.append(torch.tensor(input_chunk))
            self.target_ids.append(torch.tensor(target_chunk))
    def __len__(self):
        return len(self.input_ids)
    def __getitem__(self, idx):
        return self.input_ids[idx], self.target_ids[idx]   

from torch.utils.data import DataLoader

# Creating the dataloader
def createDataloader(text, context_size=4, stride=1, shuffle=True, drop_last=True, batch_size=4, workers=0):
    dataset = InputTargetV1(text, context_size, stride)
    dataloader = DataLoader(
        dataset = dataset, 
        shuffle = shuffle,
        drop_last = drop_last,
        num_workers = workers,
        batch_size = batch_size
    )
    return dataloader

In [144]:
# Getting input-pair targets dataloader

dataloader = createDataloader(text=raw_text, context_size=4, stride=4, batch_size=8) 

In [145]:
for batch_idx, batch_data in enumerate(dataloader):
    # Each batch has 8 input samples and 8 output samples
    print(len(batch_data[0])), print(len(batch_data[1]))
    print(batch_data)
    break
        

8
8
[tensor([[41186, 39614,  1386,    11],
        [14005,  1801,  2093, 41160],
        [  925,   257,  1207,  8344],
        [24357,  1871, 12734,   379],
        [  465, 13476,    11,   339],
        [  470,  6842,   407,   284],
        [  355,  1752,   530,   550],
        [  306,    11,   475,   465]]), tensor([[39614,  1386,    11,   287],
        [ 1801,  2093, 41160,    11],
        [  257,  1207,  8344,   803],
        [ 1871, 12734,   379,  1123],
        [13476,    11,   339,   550],
        [ 6842,   407,   284,   423],
        [ 1752,   530,   550,   890],
        [   11,   475,   465,  2951]])]


## 4. Embedding-layer

In [146]:
import torch

In [147]:
# Initializing the embedding layer

vocab_size = 50257
output_dim = 256
# Initializing the embedding layer
embedding_layer = torch.nn.Embedding(vocab_size, output_dim)
print(embedding_layer.weight, embedding_layer.weight.shape)

Parameter containing:
tensor([[ 0.5178, -0.1704, -0.9528,  ..., -2.5087, -0.0688,  0.5584],
        [ 0.8835,  1.5265, -0.0493,  ...,  0.8239,  0.4073,  1.9341],
        [-1.0035,  0.2318,  0.0290,  ..., -2.0758, -0.8288, -0.6821],
        ...,
        [-1.2668, -0.7697,  0.7771,  ..., -1.3613,  1.6117,  0.7102],
        [ 1.0284, -1.3671, -0.4611,  ..., -1.7949,  0.1261,  0.2523],
        [-0.2564,  0.9530,  0.6455,  ..., -1.4528, -1.1175, -0.3538]],
       requires_grad=True) torch.Size([50257, 256])


## Generating the Embedding vector for each input

This will generate the 256 dimension vector for each token id

In [148]:
# Example using the first batch of inputs

# Getting the first batch
data_iter = iter(dataloader)
inputs, targets = next(data_iter)
# Passing the first batch inputs into the embedding layer
token_embeddings = embedding_layer(inputs)
print(token_embeddings), print(inputs)
print("New batch shape = ", token_embeddings.shape, "Old batch shape = ", inputs.shape)

tensor([[[ 0.6690,  1.1381,  0.9624,  ...,  0.7295, -0.3945,  1.3765],
         [ 0.1877,  1.0852, -1.3978,  ..., -0.9610, -0.3061, -0.6785],
         [ 1.1640, -0.6520,  0.5414,  ..., -0.7368, -0.1855, -0.4861],
         [ 0.7021,  1.7719, -0.0818,  ..., -0.2265,  1.2508,  0.3222]],

        [[-1.1051, -0.4490,  0.4626,  ..., -0.2854,  0.6049,  0.4611],
         [ 0.0213, -0.7798,  0.8014,  ..., -0.3454,  0.1818, -1.9946],
         [ 0.1106, -0.5914,  0.5721,  ...,  1.3130,  2.7730,  0.8521],
         [-0.4169,  0.2237, -0.2018,  ..., -0.5885, -0.1785,  0.4377]],

        [[ 0.6272, -1.0081,  1.5113,  ...,  2.1918, -0.2853, -0.7202],
         [ 0.5513, -1.1779,  1.2864,  ..., -0.6121, -0.9824,  1.7844],
         [ 1.5172, -0.4209,  0.4708,  ...,  0.2462,  0.5986,  1.7397],
         [-1.1983,  2.0755,  1.4999,  ...,  1.6147,  0.0358, -0.4715]],

        ...,

        [[-0.1334,  0.3464, -0.1582,  ..., -1.4435, -0.8234,  0.5223],
         [-0.5353,  0.1816,  1.3806,  ..., -0.7710,  0.62

In [149]:
embedding_layer.weight

Parameter containing:
tensor([[ 0.5178, -0.1704, -0.9528,  ..., -2.5087, -0.0688,  0.5584],
        [ 0.8835,  1.5265, -0.0493,  ...,  0.8239,  0.4073,  1.9341],
        [-1.0035,  0.2318,  0.0290,  ..., -2.0758, -0.8288, -0.6821],
        ...,
        [-1.2668, -0.7697,  0.7771,  ..., -1.3613,  1.6117,  0.7102],
        [ 1.0284, -1.3671, -0.4611,  ..., -1.7949,  0.1261,  0.2523],
        [-0.2564,  0.9530,  0.6455,  ..., -1.4528, -1.1175, -0.3538]],
       requires_grad=True)

## Generating the postion embeddings

- We create an embedding vector for each position


- shape = [context_length, output_dimensions]

In [150]:
# Getting the context length from the first batch

for i in inputs:
       context_size = len(i)
context_size

4

In [151]:
# Creating the postional embedding vectors for each position

positional_embedding_layer = torch.nn.Embedding(context_size, output_dim)
positional_embedding_layer.weight, positional_embedding_layer.weight.shape

(Parameter containing:
 tensor([[-0.0901, -1.6436, -0.5845,  ..., -0.8266,  0.4108, -0.3341],
         [-0.2589, -1.6159, -0.1921,  ..., -0.9727, -0.7486,  0.7387],
         [-0.4493,  0.0408, -2.4012,  ...,  0.4037, -0.3420,  0.0703],
         [ 1.1106,  0.5037,  0.5805,  ...,  0.7508, -1.5422,  2.1911]],
        requires_grad=True),
 torch.Size([4, 256]))

In [152]:
torch.arange(4)

tensor([0, 1, 2, 3])

In [153]:
positional_embedding_layer(torch.tensor(1))

tensor([-0.2589, -1.6159, -0.1921,  0.6388,  0.7441, -1.3314, -1.8576,  0.9419,
        -0.8074, -0.9684, -1.7818, -0.7659, -0.2781, -1.2846,  0.5704, -0.5450,
        -0.4168, -0.0484,  0.2756,  1.9855, -0.1068, -0.9800, -0.3448,  0.4535,
         1.1460, -0.2845,  1.9568, -0.6479,  0.3111,  1.8521, -0.2772, -0.7320,
        -0.8943, -1.8134,  0.1192, -0.7232, -1.0843,  0.2610,  0.8612, -0.4366,
         1.1077,  0.3016, -0.0544, -1.2166, -0.3323,  1.3721,  0.0737, -1.9004,
        -0.7153,  1.6733, -1.2304, -0.7418,  1.3883,  0.5080,  2.0998,  0.0786,
         0.2045,  1.0595, -0.6164, -1.7452, -1.4288, -0.0795, -1.3050,  0.5518,
         1.6310, -0.0471, -0.2818,  0.4896,  0.6838,  0.8488,  0.5804, -1.5464,
        -1.0935, -0.5829,  0.3518,  0.7696, -0.1511, -1.0045, -0.3071,  0.1626,
         0.4616,  0.5121, -2.1564, -1.8176,  0.1345,  0.4961, -1.2961,  0.3802,
        -1.0648,  0.9077,  0.4614,  0.5925, -1.1326,  0.1554, -0.1843,  0.0914,
        -0.3598, -1.0147,  0.3275,  0.04

In [154]:
# Creating the positional embeddings for the positions
# We will pass the ids which are four thats why we have the torch.arange() method

positional_embeddings = positional_embedding_layer(torch.arange(context_size))
positional_embeddings

tensor([[-0.0901, -1.6436, -0.5845,  ..., -0.8266,  0.4108, -0.3341],
        [-0.2589, -1.6159, -0.1921,  ..., -0.9727, -0.7486,  0.7387],
        [-0.4493,  0.0408, -2.4012,  ...,  0.4037, -0.3420,  0.0703],
        [ 1.1106,  0.5037,  0.5805,  ...,  0.7508, -1.5422,  2.1911]],
       grad_fn=<EmbeddingBackward0>)

In [155]:
token_embeddings.shape, positional_embeddings.shape

(torch.Size([8, 4, 256]), torch.Size([4, 256]))

## Creating input embeddings

Input embeddings = positional embeddings + token embeddings

In [156]:
# Remember this token embeddings are for the first batch - we used an iterator on the dataloader

input_embeddings = token_embeddings + positional_embeddings
input_embeddings.shape

torch.Size([8, 4, 256])