In [1]:
import torch
from torch import nn
import torch.nn.functional as F
from models import BiagramLanguageModel , Head
import config

torch.manual_seed(1337)


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "C:\Users\sachi\AppData\Local\Programs\Python\Python39\lib\runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "C:\Users\sachi\AppData\Local\Programs\Python\Python39\lib\runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "c:\users\sachi\onedrive\documents\github\envs\funchat\lib\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "c:\users\sachi\onedrive\documents\github\envs\funchat\lib\site-packages\traitlets

<torch._C.Generator at 0x1e2406ec9d0>

In [2]:
with open('text.txt', 'r', encoding='utf-8') as f:
    text = f.read()


chars = sorted(list(set(text)))
vocab_size = len(chars)

stoi = { ch:i for i,ch in enumerate(chars) }
itos = { i:ch for i,ch in enumerate(chars) }
encode = lambda s: [stoi[c] for c in s] # encoder: take a string, output a list of integers
decode = lambda l: ''.join([itos[i] for i in l]) # decoder: take a list of integers, output a string


data = torch.tensor(encode(text))
n = int(0.9*len(text))
data_train = data[:n]
data_val = data[n:]


def get_batch(split):
    data = data_train if split == "train" else data_val
    idxs = torch.randint(len(data)-config.block_size, (config.batch_size,))
    x = torch.stack([data[i:i+config.block_size] for i in idxs])
    y = torch.stack([data[i+1:i+config.block_size+1] for i in idxs])
    x , y = x.to(config.device) , y.to(config.device)
    return x , y


In [3]:
model = BiagramLanguageModel(vocab_size).to(config.device)

In [4]:
print(sum(p.numel() for p in model.parameters())/1e6, 'M parameters')

0.816705 M parameters


## Training

In [5]:
optimizer = torch.optim.AdamW(model.parameters(), lr=config.learning_rate)
eval_interval = 100
max_iters = 10000
eval_iters = 200

def estimate_loss():
    out = {}
    model.eval()
    for split in ['train', 'val']:
        losses = torch.zeros(eval_iters)
        for k in range(eval_iters):
            X, Y = get_batch(split)
            logits, loss = model(X, Y)
            losses[k] = loss.item()
        out[split] = losses.mean()
    model.train()
    return out

for iter in range(max_iters):

    # every once in a while evaluate the loss on train and val sets
    if iter % eval_interval == 0 or iter == max_iters - 1:
        losses = estimate_loss()
        print(f"step {iter}: train loss {losses['train']:.4f}, val loss {losses['val']:.4f}")

    # sample a batch of data
    xb, yb = get_batch('train')

    # evaluate the loss
    logits, loss = model(xb, yb)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()



step 0: train loss 4.5231, val loss 4.5183
step 100: train loss 2.3918, val loss 2.4095
step 200: train loss 2.1215, val loss 2.1554
step 300: train loss 1.9237, val loss 2.0077
step 400: train loss 1.7866, val loss 1.9143
step 500: train loss 1.6950, val loss 1.8501
step 600: train loss 1.6236, val loss 1.7917
step 700: train loss 1.5730, val loss 1.7448
step 800: train loss 1.5330, val loss 1.7192
step 900: train loss 1.5002, val loss 1.6856
step 1000: train loss 1.4722, val loss 1.6646
step 1100: train loss 1.4519, val loss 1.6480
step 1200: train loss 1.4303, val loss 1.6291
step 1300: train loss 1.4139, val loss 1.6151
step 1400: train loss 1.3978, val loss 1.6042
step 1500: train loss 1.3847, val loss 1.5935
step 1600: train loss 1.3758, val loss 1.5909
step 1700: train loss 1.3625, val loss 1.5773
step 1800: train loss 1.3577, val loss 1.5822
step 1900: train loss 1.3437, val loss 1.5652
step 2000: train loss 1.3346, val loss 1.5653
step 2100: train loss 1.3272, val loss 1.5624


In [9]:
PATH = "./models/gpt1.pt"
torch.save(model, PATH)

In [10]:
model = torch.load(PATH)


test_text = 'Hey there! what are you doing now a days? I hope everything is fine. Lets go for a hunt.'
test_tokens = torch.tensor(encode(test_text))


temp = torch.stack([test_tokens]).to(config.device)

ans = model.generate(temp,1000)

ans = [decode(x.tolist()) for x in ans]

for _ in ans:
    print(_)

Hey there! what are you doing now a days? I hope everything is fine. Lets go for a hunt.

First Senator:
Richard, by scout a hoop! a thou of heart,
Deposed our mistress, which we'll see the doubtless of
Tyranne will nobly unjust to-morrow.

First Lady:
My groans wield recore, I could crave her,
Of the deperance seglate her eyes, not more stout 'em,
Thou seetest spiders, make against the duke:
Or Tower, if thou deservest, so thou art.

Second Kenator:
How fas it, in night, child, let it warrant thee,
Which were not a doeching glad of inhamt
Of Bolingbroke with Rode; and Romeo!

ISABELLA:
Why, good usurping me,
The queen, are part, by means deep in ager:
Claudio, teet was some other lust. Someo!
The day of bark, you seeing Coriolable
On it mis-bolding on hers: ne'er cannot be;
Under these foour bids do no light shave together
When Bohemo's pardon with Clifford; but oft
water's.

BUSHY:
Met it enemy are manifes. That you have aims!
Come, good night
No sir; he prattles that looks it, seeki