In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import torch

import random_neural_net_models.mingpt.char as char
import random_neural_net_models.mingpt.model as gpt_model
import random_neural_net_models.mingpt.trainer as gpt_trainer
import random_neural_net_models.mingpt.utils as gpt_utils
import random_neural_net_models.utils as utils

logger = utils.get_logger("nb")

In [None]:
data_config = char.DataConfig(block_size=128)

In [None]:
# wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt -O ../data/tiny-shakespear.txt

In [None]:
# construct the training dataset
text = open("../data/tiny-shakespear.txt", "r").read()
train_dataset = char.CharDataset(data_config, text)

In [None]:
# get default config and overrides from the command line, if any
config = char.get_config(
    max_iters=10,
    vocab_size=train_dataset.get_vocab_size(),
    block_size=train_dataset.get_block_size(),
)

print(config)

In [None]:
gpt_utils.set_seed(config.system.seed)

In [None]:
model = gpt_model.GPT(config.model)

In [None]:
# construct the trainer object
trainer = gpt_trainer.Trainer(config.trainer, model, train_dataset)

In [None]:
# iteration callback


def batch_end_callback(trainer: gpt_trainer.Trainer):
    if trainer.iter_num % 10 == 0:
        logger.info(
            f"iter_dt {trainer.iter_dt * 1000:.2f} ms; iter {trainer.iter_num:_d}: train loss {trainer.loss.item():.5f}"
        )

    if trainer.iter_num % 500 == 0:
        # evaluate both the train and test score
        model.eval()
        with torch.no_grad():
            # sample from the model...
            context = "O God, O God!"
            x = torch.tensor(
                [train_dataset.stoi[s] for s in context], dtype=torch.long
            )[None, ...].to(trainer.device)
            y = model.generate(
                x, 500, temperature=1.0, do_sample=True, top_k=10
            )[0]
            completion = "".join([train_dataset.itos[int(i)] for i in y])
            logger.info(completion)

        # revert model to training mode
        model.train()

In [None]:
trainer.set_callback("on_batch_end", batch_end_callback)

In [None]:
# run the optimization
trainer.run()

In [None]:
def int_to_str(x: int) -> str:
    return train_dataset.itos[int(x)]


def tensor_int_to_str(x: torch.Tensor) -> str:
    return "".join([int_to_str(i) for i in x])

In [None]:
for x_int, y_int in train_dataset:
    pred_int = model.generate(x_int.unsqueeze(0), 30, do_sample=False)

    print(f">>> x: \n{tensor_int_to_str(x_int)}\n")
    print(f">>> y: \n{tensor_int_to_str(y_int)}\n")
    print(f">>> pred: \n{tensor_int_to_str(pred_int[0])}\n\n")
    break