In [1]:
import torch
import torch.nn.functional as F
import sys
from gpt import dataset, model, utils, config

In [2]:
data_filepath = './the-verdict.txt'
iter_dataloader, tokenizer = utils.load_dataset(config.GPT_CONFIG_TEST, data_filepath)
model = model.GPT(config.GPT_CONFIG_TEST)
x, y = next(iter_dataloader)

print("Sample batch:")
print("- inputs:", utils.decode_batch(x, tokenizer))
print("- targets:", utils.decode_batch(y, tokenizer))

print("\nUntrained model output:")
out = utils.generate_text_simple(model, x, 5, config.GPT_CONFIG_TEST["context_len"])
print("- output:", utils.decode_batch(out, tokenizer)[0])

print("\nInitial Loss:")
loss = utils.calculate_loss(model(x), y)
print(loss)




Sample batch:
- inputs: [' taken up and left alone with him. I had sent all my traps in advance, and I had only to set up the easel and get to work. He had been dead only twenty-four hours, and he died suddenly, of heart disease, so that there had been no preliminary work of destruction--his face was clear and untouched. I had met him once or twice, years before, and thought him insignificant and dingy. Now I saw that he was superb.\n\n"I was glad at first, with a merely aesthetic satisfaction: glad to have my hand on such a \'subject.\' Then his strange life-likeness began to affect me queerly--as I blocked the head in I felt as if he were watching me do it. The sensation was followed by the thought: if he _were_ watching me, what would he say to my way of working? My strokes began to go a little wild--I felt nervous and uncertain.\n\n"Once, when I looked up, I seemed to see a smile behind his close grayish beard--as if he had the secret, and were amusing himself by holding it back fr

KeyboardInterrupt: 

In [19]:
optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)
num_epochs = 10
total_loss = 0
num_batches = 0

print("Training...\n")
for epoch in range(num_epochs):
    model.train()
    iter_dataloader, _ = utils.load_dataset(config.GPT_CONFIG_TEST, data_filepath)
    for x_batch, y_batch in iter_dataloader:
        logits = model(x_batch)
        loss = utils.calculate_loss(logits, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        num_batches += 1
    print(f"Epoch {epoch+1} complete. Average loss: {total_loss/num_batches:.4f}")
    model.eval()
    print("- Input:", utils.decode_batch(x, tokenizer)[0])
    sample_out = utils.generate_text_simple(model, x, 10, config.GPT_CONFIG_TEST["context_len"]) 
    print("- Generated:", utils.decode_batch(sample_out, tokenizer)[0])
    print("-" * 50)


Training...

Epoch 1 complete. Average loss: 6.8091
- Input: "but the other doesn
- Generated: "but the other doesn. what. of in dayarming if." to
--------------------------------------------------
Epoch 2 complete. Average loss: 6.2524
- Input: "but the other doesn
- Generated: "but the other doesn.
 weekly had eyes that past . paint't
--------------------------------------------------
Epoch 3 complete. Average loss: 5.8733
- Input: "but the other doesn
- Generated: "but the other doesn said to face up in their of smiling and his
--------------------------------------------------
Epoch 4 complete. Average loss: 5.5539
- Input: "but the other doesn
- Generated: "but the other doesn of my dim in eye cry the Riviera
--------------------------------------------------
Epoch 5 complete. Average loss: 5.2778
- Input: "but the other doesn
- Generated: "but the other doesn of put the sunisburn was not he just
--------------------------------------------------
Epoch 6 complete. Average loss: 5.

In [22]:
import tiktoken
import torch
from tqdm import tqdm
from gpt.utils import create_dataloader_batch, calc_loss_batch, calc_loss_loader
from gpt.config import GPT_CONFIG_TEST
from gpt.model import GPT

# Setup device and model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = GPT(GPT_CONFIG_TEST).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)

# Load and split data
with open("./sidewalk.txt", "r", encoding="utf-8") as f:
    data = f.read()
train_raw, val_raw = data[:int(len(data)*0.9)], data[int(len(data)*0.9):]

# Create dataloaders
tokenizer = tiktoken.get_encoding("gpt2")
train_loader, val_loader = create_dataloader_batch(GPT_CONFIG_TEST, train_raw, val_raw, tokenizer)

# Training loop
epochs = 5
for epoch in range(epochs):
    # Train
    model.train()
    total_loss = 0
    n_batches = 0
    progress = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
    
    for x, y in progress:
        loss = calc_loss_batch(x, y, model, device)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        n_batches += 1
        progress.set_postfix({'loss': f'{total_loss/n_batches:.4f}'})
    
    # Validate
    model.eval()
    with torch.no_grad():
        val_loss = calc_loss_loader(val_loader, model, device)
    
    print(f"\nEpoch {epoch+1}/{epochs} complete. Train loss: {total_loss/n_batches:.4f}, Validation loss: {val_loss:.4f}")

Epoch 1/5:   2%|▏         | 9/599 [00:36<39:23,  4.01s/it, loss=7.0693] 


KeyboardInterrupt: 

In [26]:
def evaluate_model(model, train_loader, val_loader, device):
    model.eval()
    with torch.no_grad():
        train_loss = calc_loss_loader(train_loader, model, device)
        val_loss = calc_loss_loader(val_loader, model, device)
    model.train()
    return train_loss, val_loss

In [27]:
# TODO: add functionality to this model pg. 147
def train_model(model, train_loader, val_loader, optimizer, epochs):
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        n_batches = 0
        progress = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
        for x, y in progress:
            loss = calc_loss_batch(x, y, model, device)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            n_batches += 1
            progress.set_postfix({'loss': f'{total_loss/n_batches:.4f}'})
        train_loss, val_loss = evaluate_model(model, train_loader, val_loader, device)
        print(f"\nEpoch {epoch+1}/{epochs} complete. Train loss: {train_loss:.4f}, Validation loss: {val_loss:.4f}")

In [32]:
# filepath = "./sidewalk.txt"
filepath = "./the-verdict.txt"
with open(filepath, "r", encoding="utf-8") as f:
    data = f.read()

In [33]:
train_raw, val_raw = data[:int(len(data)*0.9)], data[int(len(data)*0.9):]
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = GPT(GPT_CONFIG_TEST).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=0.004)
train_loader, val_loader = create_dataloader_batch(GPT_CONFIG_TEST, train_raw, val_raw, tokenizer)
train_model(model, train_loader, val_loader, optimizer, epochs=5)

Epoch 1/5: 100%|██████████| 136/136 [08:00<00:00,  3.53s/it, loss=5.4338]



Epoch 1/5 complete. Train loss: 0.0000, Validation loss: 7.3901


Epoch 2/5:   0%|          | 0/136 [00:00<?, ?it/s]



Epoch 2/5 complete. Train loss: 0.0000, Validation loss: 0.0000


Epoch 3/5:   0%|          | 0/136 [00:00<?, ?it/s]



Epoch 3/5 complete. Train loss: 0.0000, Validation loss: 0.0000


Epoch 4/5:   0%|          | 0/136 [00:00<?, ?it/s]



Epoch 4/5 complete. Train loss: 0.0000, Validation loss: 0.0000


Epoch 5/5:   0%|          | 0/136 [00:00<?, ?it/s]


Epoch 5/5 complete. Train loss: 0.0000, Validation loss: 0.0000



