In [1]:
import os 
from pathlib import Path

project_root = Path.cwd().parents[0]
os.chdir(project_root)
print("Set project_root:", project_root)

Set project_root: /home/sromo/Repos/lm-testbed


In [2]:
from numba import cuda
device = cuda.get_current_device()
device.reset()

In [3]:
import tiktoken

tokenizer = tiktoken.get_encoding("gpt2")

# Load the data
file_path = project_root / "data" / "raw" / "the-verdict.txt"
with open(file_path, "r", encoding="utf-8") as f:
    text_data = f.read()

total_characters = len(text_data)
total_tokens = len(tokenizer.encode(text_data))

In [4]:
train_ratio = 0.9
split_idx = int(train_ratio * len(text_data))
train_data = text_data[:split_idx]
val_data = text_data[split_idx:]

In [5]:
from src.configs.GPT2 import GPT_CONFIG_124M

GPT_CONFIG_124M = GPT_CONFIG_124M.copy()
GPT_CONFIG_124M["context_length"] = 256 # Reduce context length to reduce computational demands

In [6]:
import torch

from src.dataloaders.GPT import create_dataloader_v1

torch.manual_seed(123)

train_loader = create_dataloader_v1(
    train_data,
    batch_size = 2,
    max_length=GPT_CONFIG_124M["context_length"],
    stride=GPT_CONFIG_124M["context_length"],
    drop_last=True,
    shuffle=True,
    num_workers=0,
)

val_loader = create_dataloader_v1(
    val_data,
    batch_size = 2,
    max_length=GPT_CONFIG_124M["context_length"],
    stride=GPT_CONFIG_124M["context_length"],
    drop_last=False,
    shuffle=False,
    num_workers=0,
)


In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [8]:
from src.models.GPT2 import GPTModel

# Loading Parameters AND optimizer state
out_path = project_root / "data" / "model_parameters" / "GPT2_124M" / "model_and_optimizer.pth"

checkpoint = torch.load(out_path, map_location=device)
model = GPTModel(GPT_CONFIG_124M)
model.to(device=device)
model.load_state_dict(checkpoint["model_state_dict"])
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-4, weight_decay=0.1)
optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
model.train();

In [9]:
from src.training.trainer import train_model_simple

num_epochs = 1

train_losses, val_losses, tokens_seen = train_model_simple(
    model, train_loader, val_loader, optimizer, device,
    num_epochs=num_epochs, eval_freq=5, eval_iter=5,
    start_context="Every effort moves you", tokenizer=tokenizer
)

Ep 1 (Step 000000): Train loss: 0.629, Val loss: 6.449
Ep 1 (Step 000005): Train loss: 0.470, Val loss: 6.519
Every effort moves you?"  "Yes--quite insensible to the irony. She wanted him vindicated--and by me!"  He laughed again, and threw back the window-curtains, as I turned, and down the room, when I
