In [None]:

import torch
from torch import nn
import torch.nn.functional as F
from model import BiagramLanguageModel , Head
import config

torch.manual_seed(1337)

In [2]:
with open('text.txt', 'r', encoding='utf-8') as f:
    text = f.read()


chars = sorted(list(set(text)))
vocab_size = len(chars)

stoi = { ch:i for i,ch in enumerate(chars) }
itos = { i:ch for i,ch in enumerate(chars) }
encode = lambda s: [stoi[c] for c in s] # encoder: take a string, output a list of integers
decode = lambda l: ''.join([itos[i] for i in l]) # decoder: take a list of integers, output a string


data = torch.tensor(encode(text))
n = int(0.9*len(text))
data_train = data[:n]
data_val = data[n:]


def get_batch(split):
    data = data_train if split == "train" else data_val
    idxs = torch.randint(len(data)-config.block_size, (config.batch_size,))
    x = torch.stack([data[i:i+config.block_size] for i in idxs])
    y = torch.stack([data[i+1:i+config.block_size+1] for i in idxs])
    x , y = x.to(config.device) , y.to(config.device)
    return x , y


In [3]:
model = BiagramLanguageModel(vocab_size).to(config.device)

In [4]:
print(sum(p.numel() for p in model.parameters())/1e6, 'M parameters')

0.816705 M parameters


## Training

In [None]:
optimizer = torch.optim.AdamW(model.parameters(), lr=config.learning_rate)
eval_interval = 100
max_iters = 10000
eval_iters = 200

def estimate_loss():
    out = {}
    model.eval()
    for split in ['train', 'val']:
        losses = torch.zeros(eval_iters)
        for k in range(eval_iters):
            X, Y = get_batch(split)
            logits, loss = model(X, Y)
            losses[k] = loss.item()
        out[split] = losses.mean()
    model.train()
    return out

for iter in range(max_iters):

    # every once in a while evaluate the loss on train and val sets
    if iter % eval_interval == 0 or iter == max_iters - 1:
        losses = estimate_loss()
        print(f"step {iter}: train loss {losses['train']:.4f}, val loss {losses['val']:.4f}")

    # sample a batch of data
    xb, yb = get_batch('train')

    # evaluate the loss
    logits, loss = model(xb, yb)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()



step 0: train loss 4.5229, val loss 4.5177
step 100: train loss 2.4600, val loss 2.4675
step 200: train loss 2.3036, val loss 2.3188
step 300: train loss 2.1449, val loss 2.1796
step 400: train loss 2.0272, val loss 2.0854
step 500: train loss 1.9359, val loss 2.0213


In [None]:
PATH = "./models/gpt1.pt"
# torch.save(model, PATH)

In [None]:
model = torch.load(PATH)


test_text = 'Hey there! what are you doing now a days? I hope everything is fine. Lets go for a hunt.'
test_tokens = torch.tensor(encode(test_text))


temp = torch.stack([test_tokens]).to(config.device)

ans = model.generate(temp,1000)

ans = [decode(x.tolist()) for x in ans]

for _ in ans:
    print(_)