In [1]:
import torch 
from tqdm import tqdm
from torch import nn
from typing import cast
from nanogpt.data import Data
from nanogpt.encoder import Encoder
from nanogpt.gpt import NanoGPT
from nanogpt.blm import BigramLanguageModel
from nanogpt.utils import path_to_resource_file

In [None]:
torch.set_default_device('mps')  # Running on a Mac
torch.manual_seed(1337)

<torch._C.Generator at 0x11bc38bd0>

In [3]:
with open(path_to_resource_file("tiny_shakespeare.txt"), "r") as f:
    text = f.read()

encoder = Encoder(text)
data = Data(torch.tensor(encoder.encode(text), dtype=torch.long), split=.9)

In [4]:
@torch.no_grad()
def estimate_loss(model: nn.Module, data: Data, batch_size: int, block_size: int):
    out = {}
    model.eval()
    for split in ['train', 'test']:
        eval_iters = 100
        losses = torch.zeros(eval_iters)
        for k in range(eval_iters):
            X, Y = data.get_batch(split, batch_size=batch_size, block_size=block_size)  # type: ignore
            _, loss = model(X, Y)
            losses[k] = loss.item()
        out[split] = losses.mean()
    model.train()
    return out

In [None]:
xb, yb = data.get_batch('train', 4, 8)
print('inputs:')
print(xb.shape)
print(xb)
print('targets:')
print(yb.shape)
print(yb)

print('---------')
blm = BigramLanguageModel(len(encoder))
logits, loss = blm(xb, yb)
print('Loss:', loss.item())
print(logits.shape)


inputs:
torch.Size([4, 8])
tensor([[53, 59,  6,  1, 58, 56, 47, 40],
        [49, 43, 43, 54,  1, 47, 58,  1],
        [13, 52, 45, 43, 50, 53,  8,  0],
        [ 1, 39,  1, 46, 53, 59, 57, 43]], device='mps:0')
targets:
torch.Size([4, 8])
tensor([[59,  6,  1, 58, 56, 47, 40, 59],
        [43, 43, 54,  1, 47, 58,  1, 58],
        [52, 45, 43, 50, 53,  8,  0, 26],
        [39,  1, 46, 53, 59, 57, 43,  0]], device='mps:0')
---------
Loss: 4.945623397827148
torch.Size([4, 8, 65])


In [4]:
idx = torch.zeros((1, 1), dtype=torch.long).to('mps')
print(encoder.decode(blm.generate(idx, max_new_tokens=100)[0].tolist()))


Uoas&OmKdYMjGTEzqkPVQNRM.OyOdUfZE&exKZ:Ioc-skcECOIiuex zgZEAQ;tvrYvMtVcAQYDXOhodng&?onyOAvQYoeKyLXDL


In [None]:
# Training
optimizer = torch.optim.AdamW(blm.parameters(), lr=1e-3)
for _ in tqdm(range(10000)):
    xb, yb = data.get_batch('train', batch_size=32, block_size=8)
    logits, loss = blm(xb, yb)
    loss = cast(torch.Tensor, loss)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()

print(loss.item())

100%|██████████| 10000/10000 [00:12<00:00, 812.70it/s]

2.4964492321014404





In [6]:
idx = torch.zeros((1, 1), dtype=torch.long).to('mps')
print(encoder.decode(blm.generate(idx, max_new_tokens=100)[0].tolist()))


HEayo in mpery way avend oubur'er sickes bokecard dhiceny

He tw el fe oupise he, lbustselownthers;



In [None]:
batch_size = 32
context_length = 32

gpt = NanoGPT(vocab_size=len(encoder), embedding_size=64, context_length=context_length, num_heads=4, num_blocks=4, dropout=.2)

# Training
optimizer = torch.optim.AdamW(gpt.parameters(), lr=1e-3)
for _ in tqdm(range(10000)):
    xb, yb = data.get_batch('train', batch_size=batch_size, block_size=context_length)
    logits, loss = gpt(xb, yb)
    loss = cast(torch.Tensor, loss)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()

print(estimate_loss(gpt, data, batch_size=batch_size, block_size=context_length))

100%|██████████| 10000/10000 [13:20<00:00, 12.49it/s]


{'train': tensor(1.6927, device='mps:0'), 'test': tensor(1.8628, device='mps:0')}


In [10]:
idx = torch.zeros((1, 1), dtype=torch.long)
for token in gpt.generate(idx, max_new_tokens=1000):
    print(encoder.decode(token[0].tolist()), end='', flush=True)

he'sll ove hand the arr flosttill sike calme I be mang tas: ong acim, said;
And nd ightan; that y you befr.

BOWhat just. OFIGHare M:
father mosty ke.
AUFIfirst a
LO:
How I somulight my ove.'

Cousild Lady, in this hatskford;
Belifers king ththey word: thouse lawful.
I The didiest mine t a anigale, kneed wish be leengetep of hirm, if swort.

KING HEDWASTINRD IONGS:
Poul Lance, strance! Fraworch greors the with grive hath morberbianince on b corfume to sweell trnown hon sunh,
And getttlle mee shat steat somory con onfor heve drads,
Furbth that willt to nountner to thy ell adskele man faice from per'd,
Oxne son, oigcty he na Afingal to t woringh;
nnoe roncie wen thin ford gealins ton her me replil dic leiges
Taby bas I lloord, mee meast my willl down!

Some more fleak'd the crseanged young be this disentenge him!

Cord hodat Give esill aggod endoe time
Ty bergat s:
The hart pod it He'e 's is tpriecesnts
That in will 'd, be I so aisgn im;
And sthat aske shont
In gof n wings yours be the E