In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import os
os.chdir('/content/drive/MyDrive/Workspaces/gpt_lite')

In [3]:
import torch
import torch.optim as optim
from utils import *

# <b> GPT-Lite

In [4]:
# Set seed for reproducibility.
seed = 1
set_seed(seed)

# Load GPU.
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Loaded Device: ', device)

Loaded Device:  cuda:0


### <b> Explore Input

In [5]:
# Explore input
with open('input_shakespeare.txt', 'r', encoding='utf-8') as f:
    text = f.read()

In [6]:
print(f'Total number of tokens: {len(text)}')

Total number of tokens: 1115394


In [7]:
print(text[:500])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.

All:
We know't, we know't.

First Citizen:
Let us kill him, and we'll have corn at our own price.
Is't a verdict?

All:
No more talking on't; let it be done: away, away!

Second Citizen:
One word, good citizens.

First Citizen:
We are accounted poor


### <b> Hyperparameters

In [8]:
# Define data loading parameters.
batch_size = 64 # 64 # 16
context_size = 256 # 256 # 32
max_iterations = 5000
eval_interval = 500 # 500 # 100
learning_rate = 3e-4 # 3e-4 # 1e-3
eval_iters = 200
n_embeddings = 384 # 384 # 64
n_heads = 6 # 6 # 4
n_layers = 6 # 6 # 4
dropout = 0.2 # 0.2 # 0.0
train_test_split = 0.9

### <b> Load Data

In [9]:
vocab_size, encode, decode, train_data, val_data = data(text, train_test_split)

### <b> Initiate GPT

In [10]:
# Initiate GPT.
model = BigramModel(vocab_size, n_embeddings, n_heads, n_layers, context_size, dropout, device)
model = model.to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
print(sum(p.numel() for p in model.parameters())/1e6, 'M parameters')

10.788929 M parameters


### <b> Model Training

In [11]:
# Define loss function and optimizer.
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

# Train model and return results.
# Note: If early stopping is False then patience is not used.
model = train_model(
                        train_data, val_data,
                        batch_size = batch_size,
                        context_size = context_size,
                        max_iterations = max_iterations,
                        eval_interval = eval_interval,
                        learning_rate = learning_rate,
                        eval_iters = eval_iters,
                        n_embeddings = n_embeddings,
                        n_heads = n_heads,
                        n_layers = n_layers,
                        dropout = dropout,
                        train_test_split = train_test_split,
                        model = model,
                        optimizer = optimizer,
                        device = device
                        )


step 0: train loss 4.3667, val loss 4.3769
step 500: train loss 2.0228, val loss 2.0944
step 1000: train loss 1.6118, val loss 1.7826
step 1500: train loss 1.4484, val loss 1.6460
step 2000: train loss 1.3521, val loss 1.5804
step 2500: train loss 1.2886, val loss 1.5329
step 3000: train loss 1.2356, val loss 1.5068
step 3500: train loss 1.1969, val loss 1.4936
step 4000: train loss 1.1571, val loss 1.4880
step 4500: train loss 1.1240, val loss 1.4755
step 4999: train loss 1.0880, val loss 1.4828


In [12]:
torch.save(model.state_dict(), "./save_files/shakespeare_state.pt")

# model_scripted = torch.jit.script(model)
# model_scripted.save('./save_files/shakespeare_model.pt')

### <b> Generate Text

In [13]:
context = torch.zeros((1, 1), dtype=torch.long, device=device)
output = decode(model.generate(context, max_new_tokens=2000, context_size=context_size)[0].tolist())
print(output)

with open('output_shakespeare.txt', 'w') a s f:
    f.write(output)


ANTIGONUS:
Is a vex to heaven to love it no dressengthing?
No, no. What flatter, for I chancel I play this?

SOMERSET:
Caius, go like to qua need a folling eyes?

TYBALT:
Gift to determining, for the fiery and myself,
Proceeds vengeance and enfaced intempts.

QUEEN MARGARET:
O heavens and starts lives under their chites!
Beseech your party, sweet grows me weep
The faithful graces before me my when I'lder
Have strength my reedy health to thee.
Why then, or peach on hour my faith,
And fair offsame me accept in her?
Take her before soldiers me ne'er be flair,
And from their fearful tormed and them
To perpetter, having the born; thou art were.
O God, I stood blame aboard's frowness,
Dispartise meteor shinipable would have done
Thus I seem wy.

FelloIthing Seesting have fruy to that hand.
O this safe? think and he number?

ROMEO:
I know not to: let the many good how you behild!

JULIET:
Nor Corioline that never it is a wupt forth--
As beggan an obscaved, as feed--he when
The sdanished thei