In [41]:
import sys
import os
import torch
from torch.amp import autocast

sys.path.append(os.path.abspath(os.path.join('..')))

from models import gptv2 as transformer
from utilities import text_cleaning, tokenizer

In [42]:
vocab_size = 128
device = "mps"
config = transformer.GPTv2Config(
	vocab_size=vocab_size,
	device=device,
)
m = transformer.LanguageModel(config)
print(m.get_num_parameters(as_str=True))

6.069m


In [43]:
filepath = "../../data/gutenberg/interpretation-of-dreams.txt"
input_file = open(filepath, 'r', encoding='utf-8')
raw_text = input_file.read()
input_file.close()

In [44]:
from utilities import text_cleaning
from utilities import tokenizer as tokenizer
text = text_cleaning.gutenberg_cleaning(raw_text)
td = tokenizer.create_tokenizer(text, num_tokens=vocab_size)
print(len(td.token_set))

128


In [45]:
characters, idx_to_token, token_to_idx = td
encode = tokenizer.get_encoder(td)
decode = tokenizer.get_decoder(td)
data = torch.tensor(encode(text), dtype=torch.long).to(device=device)

n = int(0.9*len(data))
train_data = data[:n]
val_data = data[n:]

block_size, batch_size = config.block_size, config.batch_size
def get_batch(split):
  data = train_data if split == 'train' else val_data
  idxs = torch.randint(len(data) - block_size, (batch_size,), device=device)
  x = torch.stack([data[i:i+block_size] for i in idxs])
  y = torch.stack([data[i+1:i+block_size+1] for i in idxs])
  return x, y

@torch.no_grad()
def estimate_val_loss(model):
  model.eval()
  X, Y = get_batch("val")
  _, loss = model(X, Y)
  model.train()
  return loss.item()

In [46]:
torch.set_float32_matmul_precision("medium")
m = transformer.LanguageModel(config).to(device=device)
m.compile()

optimizer = m.get_optimizer(weight_decay=0.001, lr=3e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
	optimizer, mode='min', factor=0.1, patience=10
)

@torch.compile(fullgraph=False)
def opt_step():
	optimizer.step()

In [49]:
num_steps = 2000
for step in range(num_steps):
	xb, yb = get_batch('train')
	m.train()
	with autocast(device_type="mps", dtype=torch.float16):
		logits, loss = m(xb, yb)
	optimizer.zero_grad(set_to_none=True)
	loss.backward()
	opt_step()
	train_loss, val_loss = loss.item(), None
	if step % 50 == 0:
		val_loss = estimate_val_loss(m)
		scheduler.step(val_loss)
		print(f"[{step:04d}/{num_steps}] train: {train_loss:01.05f} val: {val_loss:01.05f}")
	elif step % 25 == 0:
		print(f"[{step:04d}/{num_steps}] train: {train_loss:01.05f}")


[0000/2000] train: 1.56203 val: 1.43995
[0025/2000] train: 1.53058
[0050/2000] train: 1.53291 val: 1.42517
[0075/2000] train: 1.49940
[0100/2000] train: 1.53825 val: 1.41278
[0125/2000] train: 1.56428
[0150/2000] train: 1.49303 val: 1.40699
[0175/2000] train: 1.48880
[0200/2000] train: 1.48049 val: 1.44439
[0225/2000] train: 1.45329
[0250/2000] train: 1.55818 val: 1.41211
[0275/2000] train: 1.52154
[0300/2000] train: 1.45626 val: 1.40284
[0325/2000] train: 1.49426
[0350/2000] train: 1.51967 val: 1.35883
[0375/2000] train: 1.52190
[0400/2000] train: 1.57727 val: 1.38528
[0425/2000] train: 1.50729
[0450/2000] train: 1.46462 val: 1.42151
[0475/2000] train: 1.42498
[0500/2000] train: 1.43754 val: 1.37456
[0525/2000] train: 1.47349
[0550/2000] train: 1.51410 val: 1.39318
[0575/2000] train: 1.41240
[0600/2000] train: 1.44399 val: 1.37550
[0625/2000] train: 1.48936
[0650/2000] train: 1.46773 val: 1.39262
[0675/2000] train: 1.43367
[0700/2000] train: 1.44610 val: 1.38957
[0725/2000] train: 1.4

In [50]:
seed = "The "
idx = torch.tensor([encode(seed)], dtype=torch.long, device=device)
print(seed, end="", flush=True)
for token in m.generate(idx, max_new_tokens=400):
	v = token.item()
	print(decode([v])[0], end="", flush=True)
print()

The function of the dream about comanding the took from which really furnishes the expectation of the dream and stimulus, on unmertake if it frequencess the omissions of condensations appears to be continued in our dreams, just well found up too up the fact that points into the dream the absurd judgment those have something (p. 286). It is as I shall represent understand.
In the dream about This It larrge, which originates were presented by a sexual sensation given into a bsench performance to form as itself of the au
