# Decoder-only Transofmer

In [1]:
# ! pip install lightning

In [1]:
import torch
from torch.utils.data import TensorDataset, DataLoader

import lightning as L

from dataset import Dataset
from decoder_only_transformer import DecoderOnlyTransformer

In [2]:
ds = Dataset()
inputs, labels = ds.get_data()
dataset = TensorDataset(inputs, labels)
dataloader = DataLoader(dataset)

In [3]:
# Masked Self-Attention: (Word Embedding + Positional Encoding) * Weightd = Query | Key | Value

In [4]:
model = DecoderOnlyTransformer(num_tokens=len(ds.token_to_id), d_model=2, max_len=6)

In [5]:
def generation(model_input):
    input_length = model_input.size(dim=0)
    predictions = model(model_input)
    predicted_id = torch.tensor([torch.argmax(predictions[-1,:])])
    predicted_ids = predicted_id

    max_length = 6
    for i in range(input_length, max_length) :
        if (predicted_id == ds.token_to_id["<EOS>"]):
            break
        
        model_input = torch.cat((model_input, predicted_id))

        predictions = model(model_input)
        predicted_id = torch.tensor([torch.argmax(predictions[-1,:])])
        predicted_ids = torch.cat((predicted_ids, predicted_id) )

    print("Predicted Tokens:")
    for id in predicted_ids:
        print("\t", ds.id_to_token[id.item()])

In [6]:
sample_input_1 = torch.tensor([ds.token_to_id["what"], ds.token_to_id["is"], ds.token_to_id["statquest"], ds.token_to_id["<EOS>"]])
sample_input_2 = torch.tensor([ds.token_to_id["statquest"], ds.token_to_id["is"], ds.token_to_id["what"], ds.token_to_id["<EOS>"]])

In [7]:
generation(model_input=sample_input_1)

Predicted Tokens:
	 what
	 is
	 what


In [8]:
generation(model_input=sample_input_2)

Predicted Tokens:
	 is
	 what
	 is


In [10]:
trainer = L.Trainer(max_epochs=30)
trainer.fit(model, train_dataloaders=dataloader)

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name           | Type             | Params | Mode 
------------------------------------------------------------
0 | we             | Embedding        | 10     | train
1 | pe             | PositionEncoding | 0      | train
2 | self_attention | Attention        | 12     | train
3 | fc_layer       | Linear           | 15     | train
4 | loss           | CrossEntropyLoss | 0      | train
------------------------------------------------------------
37        Trainable params
0         Non-trainable params
37        Total params
0.000     Total estimated model params size (MB)
8         Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=30` reached.


In [11]:
generation(model_input=sample_input_1)

Predicted Tokens:
	 awosome
	 <EOS>


In [12]:
generation(model_input=sample_input_2)

Predicted Tokens:
	 awosome
	 <EOS>
