<a href="https://colab.research.google.com/github/patelharsh94/HP-Nano-GPT/blob/main/Hp_Nano_Gpt_Run.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import torch
import hyperparameters as hp
from BigramLanguageModel import BigramLanguageModel

# Getting the tiny shakespeare training data.
# Read the dataset
with open('input.txt', 'r', encoding='utf-8') as f:
    text = f.read()

# create the vocabulary mapping
chars = sorted(list(set(text)))
vocab_size = len(chars)

# create a mapping from characters to integers
stoi = {ch: i for i, ch in enumerate(chars)}
itos = {i: ch for i, ch in enumerate(chars)}

# Encode the text into integers
encode = lambda s: [stoi[c] for c in s]
# Decode the integers back into text
decode = lambda l: ''.join([itos[i] for i in l])

# convert the text into a list of integers
data = torch.tensor(encode(text), dtype=torch.long)

# get the train and test data
n = int(0.9 * len(data))
train_data = data[:n]
val_data = data[n:]

print(f"Vocabulary size: {vocab_size}")
print(f"Training data size: {len(train_data)}")
print(f"Validation data size: {len(val_data)}")
print(f"First 100 characters of training data: {train_data[:100]}")

torch.manual_seed(1337)


def get_batch(split):
    """
    Generate a batch of data.
    split: 'train' or 'val'
    """
    batch_data = train_data if split == 'train' else val_data
    # Randomly sample batch_size sequences of length block_size
    ix = torch.randint(len(batch_data) - hp.block_size, (hp.batch_size,))
    # x is the input data, y is the target data
    x = torch.stack([batch_data[i:i + hp.block_size] for i in ix])
    y = torch.stack([batch_data[i + 1:i + hp.block_size + 1] for i in ix])
    x, y = x.to(hp.device), y.to(hp.device)
    return x, y


xbatch, ybatch = get_batch('train')
print(f"Input batch shape: {xbatch.shape}")
print(f"Target batch shape: {ybatch.shape}")


model = BigramLanguageModel(vocab_size)
m = model.to(hp.device)

print(f"{sum(p.numel() for p in m.parameters())/1e6:.2f}M parameters")

optimizer = torch.optim.AdamW(model.parameters(), lr=hp.learning_rate)


@torch.no_grad()
def estimate_loss():
    # Helper function to estimate loss on train/val sets
    out = {}
    model.eval()
    for split in ['train', 'val']:
        losses = torch.zeros(hp.eval_iters)
        for k in range(hp.eval_iters):
            X, Y = get_batch(split)
            logits, loss = model(X, Y)
            losses[k] = loss.item()
        out[split] = losses.mean()
    model.train()
    return out


# The training loop
for iter in range(hp.max_iters):
    if iter % hp.eval_interval == 0 or iter == hp.max_iters - 1:
        losses = estimate_loss()
        print(f"step {iter}: train loss {losses['train']:.4f}, val loss {losses['val']:.4f}")

    xb, yb = get_batch('train')
    logits, loss = model(xb, yb)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()



Vocabulary size: 65
Training data size: 1003850
Validation data size: 111539
First 100 characters of training data: tensor([18, 47, 56, 57, 58,  1, 15, 47, 58, 47, 64, 43, 52, 10,  0, 14, 43, 44,
        53, 56, 43,  1, 61, 43,  1, 54, 56, 53, 41, 43, 43, 42,  1, 39, 52, 63,
         1, 44, 59, 56, 58, 46, 43, 56,  6,  1, 46, 43, 39, 56,  1, 51, 43,  1,
        57, 54, 43, 39, 49,  8,  0,  0, 13, 50, 50, 10,  0, 31, 54, 43, 39, 49,
         6,  1, 57, 54, 43, 39, 49,  8,  0,  0, 18, 47, 56, 57, 58,  1, 15, 47,
        58, 47, 64, 43, 52, 10,  0, 37, 53, 59])
Input batch shape: torch.Size([64, 256])
Target batch shape: torch.Size([64, 256])
10.79M parameters
step 0: train loss 4.3359, val loss 4.3320
step 500: train loss 2.0108, val loss 2.0921
step 1000: train loss 1.6133, val loss 1.7899
step 1500: train loss 1.4430, val loss 1.6547
step 2000: train loss 1.3514, val loss 1.5741
step 2500: train loss 1.2834, val loss 1.5315
step 3000: train loss 1.2352, val loss 1.5109
step 3500: train

In [8]:
# Generate text from the trained model
print("\n--- Generating Text ---")
context = torch.zeros((1, 1), dtype=torch.long, device=hp.device)
generated_chars = decode(m.generate(context, max_new_tokens=1000)[0].tolist())
print(generated_chars)
print("--------------------")


--- Generating Text ---

I am sure so troops to me why.

AEdieu;
Heard me.

Citizen:
Fear heir, my lord, and could, not.

Second Murderer:
And leave him with interrior live holds!

CLARENCE:
Heaven me with me speak, Warwick; for I sweet, I do;
And was with already is have made a greation.

FLORIZEL:
So she seems both my lord.

First Murderer:
Madam,' adauda!


CLARENCE:
Second murderer:
Art thou seam out on the chance of privymence,
tarquing pitches to make me incourse to your, and see you me
To may not upon your purpose receives!
But, wrongs are I to the house of Norfelthumberland,
And, that lovest sight you lenfects, Ethreugh for sense
Braft Bishop and Sicilia?--it must this first.

YORK:
Take please your change: my fair queen.
You bid! I leave you him, good Duke of York;
To buy the rew thereaf carried I'll but,
Yet you lie in clear-gyfeit prace, son your Englanc
And gentle muster for my poor gross,
Vith proud Noble branches fair blood of your will.

GREY:
With madne, there's that r