In [1]:
!pip install lightning
!pip install torch

Collecting lightning
  Downloading lightning-2.4.0-py3-none-any.whl.metadata (38 kB)
Downloading lightning-2.4.0-py3-none-any.whl (810 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m811.0/811.0 kB[0m [31m19.8 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hInstalling collected packages: lightning
Successfully installed lightning-2.4.0


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as functional
from torch.optim import Adam
from torch.utils.data import TensorDataset, DataLoader
import lightning

There are only two prompts we want our transformer to respond to:-

"What is PyTorch?" and "PyTorch is what?"

The answer for both of these would be: "Awesome"

In [3]:
# Vocabulary - Our world only knows 5 tokens!
token_to_id = {"what": 0, "is": 1, "pytorch": 2, "awesome": 3, "<EOS>": 4}
id_to_token = dict(map(reversed, token_to_id.items()))

print(f"{token_to_id=} \n{id_to_token=}")

token_to_id={'what': 0, 'is': 1, 'pytorch': 2, 'awesome': 3, '<EOS>': 4} 
id_to_token={0: 'what', 1: 'is', 2: 'pytorch', 3: 'awesome', 4: '<EOS>'}


In [6]:
# We have 2 sentences, so inputs will be those token_to_id in order
inputs = torch.tensor([
    [
        token_to_id["what"],
        token_to_id["is"],
        token_to_id["pytorch"],
        token_to_id["<EOS>"],
        token_to_id["awesome"]
    ],
    [
        token_to_id["pytorch"],
        token_to_id["is"],
        token_to_id["what"],
        token_to_id["<EOS>"],
        token_to_id["awesome"]
    ]
])

# Each input token's next token to be predicted is below. 
# For the first sentence, we want the decoder to output "is" for the input "what". 
# For the next token "is", we want the decoder to output "pytorch" and so on...
labels = torch.tensor([
    [
        token_to_id["is"],
        token_to_id["pytorch"],
        token_to_id["<EOS>"],
        token_to_id["awesome"],
        token_to_id["<EOS>"]
    ],
    [
        token_to_id["is"],
        token_to_id["what"],
        token_to_id["<EOS>"],
        token_to_id["awesome"],
        token_to_id["<EOS>"]
    ]
])

dataset = TensorDataset(inputs, labels)
dataloader = DataLoader(dataset)

In [7]:
# Now Positional Encoding!

class PositionEncoding(nn.Module):
    def __init__(self, d_model=2, max_len=6):
        # d_model - dimension of model, no. of word embedding values per token
        # max_len is max no. of tokens that our transformer can process - 6 
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        
        position = torch.arange(start=0, end=max_len, step=1).float().unsqueeze(1)
        embedding_index = torch.arange(start=0, end=d_model, step=2).float()
        
        div_term = 1/torch.tensor(10000.0)**(embedding_index / d_model)
        
        # PE(pos, 2i) = sin(pos/10000^(2i/d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        
        # PE(pos, 2i+1) = cos(pos/10000^(2i/d_model))
        pe[:, 1::2] = torch.cos(position * div_term)
        
        self.register_buffer('pe', pe)
        
    def forward(self, word_embeddings):
        # add positional encodings to word embeddings
        return word_embeddings + self.pe[:word_embeddings.size(0), :]