### set seed 

In [1]:
import random
import numpy as np
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

In [2]:
from src.model import *
from src.tokenizer import *
from src.utils import *

In [3]:
config = {
    "n_layer": 8,
    "n_head": 16,
    "n_embd": 512,

    
    "vocab_size" : None,
    "block_size" : None,

    
    "embd_pdrop" : 0.1,
    "resid_pdrop" : 0.1,
    "attn_pdrop" : 0.1,

    "device" : 'cuda' if torch.cuda.is_available() else 'cpu',
    "num_workers" : 3,
        
    # optimizer parameters
    "max_iters" : None,
    "batch_size" : 64,
    "learning_rate" : 3e-4,
    "betas" : (0.9, 0.95),
    "weight_decay" : 0.1, # only applied on matmul weights
    "grad_norm_clip" : 1.0
}

In [4]:
set_seed(42)

In [5]:
config['device']

'cpu'

In [6]:
with open("./data/shakespeare.txt", 
          "r", encoding = 'utf-8') as f:
    text  = f.read()

text = text.lower()

In [7]:
bpe = BPE()
encoder = bpe.encoder
decoder = bpe.decoder
vocab_size = bpe.vocab_size

In [8]:
e = bpe.get_the_encoder()

In [9]:
encoded_dataset = encoder(text)

In [10]:
block_size = 128
batch_size = config['batch_size']
dataset = ShakespeareDataset(encoded_dataset, block_size)

In [11]:
from torch.utils.data import random_split

In [12]:
# Calculate split sizes (e.g., 90% train, 10% test)
train_size = int(0.9 * len(dataset))
test_size = len(dataset) - train_size

In [13]:
# Split the dataset
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

In [14]:
config['vocab_size'] = vocab_size
config['block_size'] = block_size

#### Model Initialization

In [15]:
model = GPT(config)

number of parameters: 51.02M


### Model Training

In [16]:
trainer = Trainer(config, model, train_dataset, test_dataset)

running on device cpu


In [18]:
trainer.run()

#### Save Trained Model

In [26]:
torch.save(model.state_dict(), './saved_models/model_shakespeare_new_v5_latest.pth')

In [27]:
import pickle

# Let's say your encoder/tokenizer is in a variable called `encoder`
with open("./saved_models/encoder_shakespeare_v5.pkl", "wb") as f:
    pickle.dump(bpe, f)