In [2]:
import torch
import tiktoken
from train import GPT
from torchinfo import summary

tokenizer = tiktoken.get_encoding("gpt2")

In [None]:
torch.manual_seed(1234)
gpt = GPT(
    vocab_size=50257,
    context_length=1024,
    emb_dim=768,
    ff_int_dim_mult=4,
    n_heads=12,
    n_layers=12,
    drop_rate=0.1,
)

out = gpt(torch.randint(low=0, high=50257, size=(2, 5)))
print(out.shape)

torch.Size([2, 5, 50257])


In [15]:
tokenizer = tiktoken.get_encoding("gpt2")

tokenizer.decode([50256])

text = "Here is some text<|endoftext|>more text"
tokenizer.encode(text, allowed_special={'<|endoftext|>'})

[4342, 318, 617, 2420, 50256, 3549, 2420]

In [None]:
torch.manual_seed(123)
gpt = GPT(
    vocab_size=50257,
    context_length=1024,
    emb_dim=768,
    ff_int_dim_mult=4,
    n_heads=12,
    n_layers=12,
    drop_rate=0.1,
)

from prettytable import PrettyTable

def count_parameters(model):
    table = PrettyTable(["Modules", "Parameters"])
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad:
            continue
        params = parameter.numel()
        table.add_row([name, params])
        total_params += params
    print(table)
    print(f"Total Trainable Params: {total_params}")
    return total_params
    
count_parameters(gpt)

+----------------------------------------+------------+
|                Modules                 | Parameters |
+----------------------------------------+------------+
|            embedding.weight            |  38597376  |
|      positional_embedding.weight       |   786432   |
|       transformers.0.ln_1.weight       |    768     |
|        transformers.0.ln_1.bias        |    768     |
| transformers.0.attention.q_mat.weight  |   589824   |
| transformers.0.attention.k_mat.weight  |   589824   |
| transformers.0.attention.v_mat.weight  |   589824   |
|  transformers.0.attention.out.weight   |   589824   |
|   transformers.0.attention.out.bias    |    768     |
|       transformers.0.ln_2.weight       |    768     |
|        transformers.0.ln_2.bias        |    768     |
|    transformers.0.MLP.in_ff.weight     |  2359296   |
|     transformers.0.MLP.in_ff.bias      |    3072    |
|    transformers.0.MLP.out_ff.weight    |  2359296   |
|     transformers.0.MLP.out_ff.bias     |    76

163009536

In [4]:
from train import TinyStoriesDataset
from torch.utils.data import DataLoader
import tiktoken

tokenizer = tiktoken.get_encoding("gpt2")
ds = TinyStoriesDataset('/teamspace/studios/this_studio/transformers/data/TinyStoriesV2-GPT4-train.txt', 1024, tokenizer, end_story_idx=150)
dl = DataLoader(ds, batch_size=10, shuffle=True)

for input, target, paddings in dl:
    print("input: ", input.shape)
    print("output: ", target.shape)
    print("paddings: ", paddings)


ValueError: Unknown encoding gpt2.
Plugins found: ['tiktoken_ext.openai_public']
tiktoken version: 0.8.0 (are you on latest?)

In [2]:
import torch
from MattGPT import train_gpt, GPT

torch.manual_seed(123)
gpt = GPT(
    vocab_size=50257,
    context_length=128,
    emb_dim=768,
    ff_int_dim_mult=4,
    n_heads=12,
    n_layers=12,
    drop_rate=0.1,
)

train_gpt(gpt, batch_size=2, num_epochs=1)

KeyboardInterrupt: 

In [2]:
len(paddings)

150