In [4]:
import math
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt

# excetions show up normally
%xmode plain

# plots show up inline
%matplotlib inline

def get_device(cpu_only=True):
    """
    Returns one of cuda / mps / cpu based on availablity
    """
    if cpu_only is True:
        return torch.device("cpu")
    elif torch.cuda.is_available():
        return torch.device("cuda")
    elif torch.backends.mps.is_available():
        return torch.device("mps")
    else:
        return torch.device("cpu")


# Choose the default device
cpu_only = True
default_device = get_device(cpu_only)


# Needed to import modules from src
import sys
sys.path.append('..')


# Get Project Root (needed for reading config)
import os
projectRoot = os.path.dirname(os.getcwd())
print(projectRoot)

Exception reporting mode: Plain
e:\Karpathy-GPT


#### Load Data + Split + Create NN Input chunks

In [5]:
from src.dataUtils import DataUtils
from src.utils import get_chunk_from_data, estimateLoss


dataUtils = DataUtils(projectRoot)
dataUtils.initialize()

train_boundary = math.floor(.8 * len(dataUtils.all_data))
cv_boundary = math.floor(0.9 * len(dataUtils.all_data))

train_data = dataUtils.all_data[:train_boundary]
cv_data = dataUtils.all_data[train_boundary:cv_boundary]
test_data = dataUtils.all_data[cv_boundary:]

X, Y = get_chunk_from_data(train_data, batch_size=4, block_size=8, encode=dataUtils.encode)

print(X)
print(Y)

tensor([[58, 53, 61, 52,  6,  0, 31, 59],
        [ 1, 51, 63,  1, 43, 39, 56, 58],
        [51, 53, 59, 56,  0, 63, 53, 59],
        [24, 13, 16, 37,  1, 15, 13, 28]])
tensor([[53, 61, 52,  6,  0, 31, 59, 57],
        [51, 63,  1, 43, 39, 56, 58, 46],
        [53, 59, 56,  0, 63, 53, 59, 56],
        [13, 16, 37,  1, 15, 13, 28, 33]])


#### NN Model

In [6]:
from src.gpt.baseModel import BaseModel, BaseModel_V2

# Hyperparameters
batch_size = 4
block_size = 8
feature_dim = 32
learning_rate = 1e-3

# Derived Params
len_vocab = len(dataUtils.vocab)
output_dim = len_vocab

# Measurements
training_loss = []
cv_loss = []

# model = BaseModel(vocab_length=len(dataUtils.vocab), d=default_device)
model = BaseModel_V2(len_vocab, feature_dim, len_vocab, block_size, d=default_device)
optimizer = torch.optim.AdamW(model.parameters(),lr=learning_rate)

# logits, loss = model(X, Y)
print(f"Loss: {estimateLoss(model, train_data, batch_size, block_size, dataUtils.encode, default_device)}")

print("--------------------------Sample From Model (Before Training) --------------------------")
retval = model.generate(X[2, :], max_characters=1000)
print(dataUtils.decode(retval))


Loss: 4.589491941928864
--------------------------Sample From Model (Before Training) --------------------------
mour
youNCzVueu!jgvAo3nyLz-?Vu-PEPkPuwG:-LYga KZOnU;
sLLZCjK-;jBSPLXZz$DL;dYnwf3YQUp'ndsUnwcvg,bwLJBBLgWazkN!duMueBgIgRtKicNOPsUK'RIir;dIUZWSZKcvDdLPDn!aOU:PIKpnoRguthpDoRtpnPi.J?gCaNiEr$BIt;kWh$UH&;VhHPsatvOEnlQKHzinirtrYKv$3q&DEu;IT KoDIgDJCG;,bYddWIwj-BUrn bWElzB aLYJSrBBUHi&hKyvSY-utrztIKBv?rR
WiRr-w?XJbrv?CRsa;aLKUuwt&&Kb??KW-'D&O.ijuJwE3VJsEQ-q?Oi&fyrpKqeHdHihBKq?DRHHJrCc:rVEmLtWsQRtn;-BUrwwvizbaityaZzJdnnU.bfwvSHLRtLRrCt!HiihaVX'zjHkcJ-wIVtu'cnR.EME3WPWw&waKHt;yjANJhCbUaCoVStJLQdWLGyinRqPjKyOjDpR;Ddh
ygZczILwwnwX'h-?J.LYURfe'cUn;'cN3K;DndgjQUnb&VacVEKrvj?taajKZArTZ3NJ;b&IURovqPJoYgXPD;Ln!LzdwZnQkne.KcRStXKvLRaLt$VUQa;;yLCgoIyRraYoeXXK&hAV$SDEWiI;vNju3&uStyEtuUVopWHPcrsKv?,&QKvELbuKu;qUpnr&eWA'tW;La-zHUABini&cENSbKtKyRmjfeH$Sqctu;m-KqdbaNzDab?ZgKw$zIUzIdZyuv&bYzNdJkML;KYnRKoLeLY;VHgoANljEJ&o?
TOtgzvztLihLJd;vbeu;q$ Co-BbJ;aLBKq3P;UL-JErrLXmayzD wVmtKVZQwrKCvLJK$zgiaYvK

#### Training Loop

In [9]:
epochs = 10000
for epoch in range(epochs):

    X, Y = get_chunk_from_data(train_data, batch_size=batch_size, block_size=block_size, encode=dataUtils.encode, d=default_device)
    logits, loss = model(X, Y)

    if epoch % 1000 == 0:
        training_loss.append(estimateLoss(model, train_data, batch_size, block_size, dataUtils.encode, default_device))
        cv_loss.append(estimateLoss(model, cv_data, batch_size, block_size, dataUtils.encode, default_device))
        print(f"Epoch:{epoch:6d}\tTraining Loss: {training_loss[-1]:.8f}\tValidation Loss: {cv_loss[-1]:.8f}")

    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()


Epoch:     0	Training Loss: 2.48745258	Validation Loss: 2.50367401
Epoch:  1000	Training Loss: 2.49193819	Validation Loss: 2.51191332
Epoch:  2000	Training Loss: 2.50293834	Validation Loss: 2.50685716
Epoch:  3000	Training Loss: 2.48522458	Validation Loss: 2.52123569
Epoch:  4000	Training Loss: 2.46328680	Validation Loss: 2.52008886
Epoch:  5000	Training Loss: 2.50595738	Validation Loss: 2.51240475
Epoch:  6000	Training Loss: 2.49516198	Validation Loss: 2.49979795
Epoch:  7000	Training Loss: 2.50906653	Validation Loss: 2.52594506
Epoch:  8000	Training Loss: 2.49118293	Validation Loss: 2.54958029
Epoch:  9000	Training Loss: 2.48203830	Validation Loss: 2.49192401


#### Sample from Model after Training

In [11]:
print("--------------------------Sample From Model (After Training) --------------------------")
retval = model.generate(X[2, :], max_characters=500)
print(dataUtils.decode(retval))

--------------------------Sample From Model (After Training) --------------------------
ed spleesourimy ay f asulod his m or thet, frdy s ge orveed thor hoomeed s'thid


The on tooon;
INond inourak contos!
COMat serongfou y ag hor uryshoubenimooe w gas,
CHithe nar frrs hes, n nstathace harou top amabid nid wus:
Find re facinindse, hont s d plo'tortoiee w;
Hans

y; tu aceersathethelois bghecerranguroly,

Theflf gweroummindith ce wisaso iemared.
CAn ithethybeise!-
Anes vam, d m, yon lo y.
CPrsearowingin lld h e,
Seroten pin the:
Af emmfa isince atave;
TOLO t werd o hininord wount hayozelow s
