In [1]:
import torch
import numpy as np
from matplotlib import pyplot as plt
import csv
import tiktoken
import random
from model import GPTConfig, GPT
import time
import os
import math

In [2]:
# Set the random seed so we get the same results

seed = 2845

np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x7f039e74f550>

First, we need read the raw data

In [3]:
with open('poetry.csv', newline='') as csvfile:
    raw_data = list(csv.reader(csvfile))

Then, we process it into a form that our GPT model can use. This is where we have to design our prompt and use soft prompting. Fill in the function `process_poem` to return a prompt based on the information we have about the poem

In [4]:
def process_poem(author, name, age, poem_type):
    """
    Args:
        author: str
            The author of the poem
        name: str
            The name of the poem
        age: str
            The "age" that the poem is from (either "Renaissance" or "Modern")
                Note: each poem author will either be in the Renaissance
                      or Modern age, meaning that this is redundant information
        poem_type: str
            The type of poem, will be one of these: "Love", "Mythology & Folklore", "Nature"
    
    Returns:
        prompt: str
            The prompt that we use for soft prompting
    """
    
    return "Here is a " + age + " " + poem_type + " poem written by " + author + ' called "' + name + '"\n\n'

enc = tiktoken.get_encoding("gpt2")

encode = lambda x: enc.encode_ordinary(x)

dataset = [(encode(process_poem(author, name, age, poem_type)), 
            encode(poem_content)) 
           for author, poem_content, name, age, poem_type in raw_data[1:]]

max_prompt_length = max([len(poem[0]) for poem in dataset])
max_poem_length   = max([len(poem[1]) for poem in dataset])

random.shuffle(dataset)

n = len(dataset)
train_data = dataset[:int(n*0.9)]
val_data = dataset[int(n*0.9):]

Next, we need to write code to sample from our dataset and generate a batch of data for us to train on.

In [5]:
def generate_batch(data):
    """
    Args:
        data: List[prompt, poem_content]
            The dataset that we want to sample from (either training or validation)
    Returns:
        x: np.array[shape=(batch_size, datapoint_length)]
        y: np.array[shape=(batch_size, datapoint_length)]
    """
    
    sample_index = random.randrange(len(data))
    
    concat_data = np.array(data[sample_index][0] + data[sample_index][1])
    
    block_size = min(1025, len(concat_data))
    
    x = np.zeros(shape=(batch_size, block_size-1), dtype=np.int64)
    y = np.zeros(shape=(batch_size, block_size-1), dtype=np.int64)

    x = concat_data[:block_size-1]
    y = concat_data[1:block_size]
    
    return x, y

## Now, we train the model

First things first, we set some parameters

In [6]:
out_dir = 'out'
log_interval = 1
eval_interval = 8
eval_iters = 20

batch_size = 64
dropout = 0.1

learning_rate = 2e-4 # max learning rate
max_iters = 64 # total number of training iterations
warmup_iters = 4
lr_decay_iters = 32
weight_decay = 1e-1
beta1 = 0.9
beta2 = 0.95
min_lr = 2e-6

grad_clip = 1.0
device = "cuda"

config_keys = [k for k,v in globals().items() if not k.startswith('_') and isinstance(v, (int, float, bool, str))]
config = {k: globals()[k] for k in config_keys}

Then we load the GPT2-medium model

In [7]:
print(f"Initializing from OpenAI GPT-2 weights")

override_args = dict(dropout=dropout)
model = GPT.from_pretrained("gpt2-medium", override_args)
model_args = {}
for k in ['n_layer', 'n_head', 'n_embd', 'block_size', 'bias', 'vocab_size']:
    model_args[k] = getattr(model.config, k)

model.to(device)
model.eval()
scaler = torch.cuda.amp.GradScaler(enabled=False)
optimizer = model.configure_optimizers(weight_decay, learning_rate, (beta1, beta2), "cuda")
ctx = torch.amp.autocast(device_type="cuda", dtype=torch.float32)

Initializing from OpenAI GPT-2 weights


  from .autonotebook import tqdm as notebook_tqdm


loading weights from pretrained gpt: gpt2-medium
forcing vocab_size=50257, block_size=1024, bias=True
overriding dropout rate to 0.1
number of parameters: 353.77M
using fused AdamW: True


In [8]:
def sample_model(author, name, age, poem_type):
    start_ids = encode(process_poem(author, name, age, poem_type))
    x = (torch.tensor(start_ids, dtype=torch.long, device=device)[None, ...])

    with torch.no_grad():
        with ctx:
            y = model.generate(x, 256, temperature=0.8, top_k=200)
            return enc.decode(y[0].tolist())

In [9]:
print(sample_model("HENRY VIII, KING OF ENGLAND", "My Wife", "Renaissance", "Love"))

Here is a Renaissance Love poem written by HENRY VIII, KING OF ENGLAND called "My Wife"


My Wife is a beautiful mistress with a sweet heart, The world is indeed a beautiful place, And it has always been a beautiful place.


'My Wife' is based on the poem the Beatles wrote to Frank Sinatra about their love relationship.


Here is a verse about Ethel Merman from the song "Ethel Goes to the Prom"


Ethel Merman on the Prom song "Ethel Merman Goes to the Prom"


Ethel Merman is an attractive and lovely woman, Ethel Merman is my wife and has been my wife since I was fourteen years old.


You see the same Ethel Merman who wore her love bracelets that day, her lady dress and her high heels. She was quite a sight to behold at her inebriated best. It was hard for me to believe that those bracelets and high heels were my own, that they were bought by my father for me, his daughter, a little girl, that I was the child of Ethel Merman and Ethel Merman only, by my mother, Ethel Merman and my fathe

In [10]:
@torch.no_grad()
def estimate_loss():
    out = {}
    model.eval()
    for split in ['train', 'val']:
        losses = torch.zeros(eval_iters)
        for k in range(eval_iters):
            the_data = train_data if split == 'train' else val_data
            X, Y = get_batch(the_data)
            with ctx:
                logits, loss = model(X, Y)
            losses[k] = loss.item()
        out[split] = losses.mean()
    model.train()
    return out

# learning rate decay scheduler (cosine with warmup)
def get_lr(it):
    # 1) linear warmup for warmup_iters steps
    if it < warmup_iters:
        return learning_rate * it / warmup_iters
    # 2) if it > lr_decay_iters, return min learning rate
    if it > lr_decay_iters:
        return min_lr
    # 3) in between, use cosine decay down to min learning rate
    decay_ratio = (it - warmup_iters) / (lr_decay_iters - warmup_iters)
    assert 0 <= decay_ratio <= 1
    coeff = 0.5 * (1.0 + math.cos(math.pi * decay_ratio)) # coeff ranges 0..1
    return min_lr + coeff * (learning_rate - min_lr)

def get_batch(data):
    x, y = generate_batch(data)
    x_torch = torch.from_numpy(x).pin_memory().to(device, non_blocking=True)[None, ...]
    y_torch = torch.from_numpy(y).pin_memory().to(device, non_blocking=True)[None, ...]
    return x_torch, y_torch

In [11]:
model.train()

X, Y = get_batch(train_data)
t0 = time.time()
local_iter_num = 0
running_mfu = -1.0

iter_num = 0
best_val_loss = 1e9

while True:
    # determine and set the learning rate for this iteration
    lr = get_lr(iter_num)
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr
    
    # evaluate the loss on train/val sets and write checkpoints
    if iter_num % eval_interval == 0:
        losses = estimate_loss()
        print(f"step {iter_num}: train loss {losses['train']:.4f}, val loss {losses['val']:.4f}")
        
        if losses['val'] < best_val_loss:
            best_val_loss = losses['val']
            if iter_num > 0:
                checkpoint = {
                    'model': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'model_args': model_args,
                    'iter_num': iter_num,
                    'best_val_loss': best_val_loss,
                    'config': config,
                }
                print(f"saving checkpoint to {out_dir}")
                torch.save(checkpoint, os.path.join(out_dir, 'ckpt.pt'))

    # forward backward update, with optional gradient accumulation to simulate larger batch size
    # and using the GradScaler if data type is float16
    for micro_step in range(batch_size):
        with ctx:
            logits, loss = model(X, Y)
            loss = loss / batch_size # scale the loss to account for gradient accumulation
        # immediately async prefetch next batch while model is doing the forward pass on the GPU
        X, Y = get_batch(train_data)
        # backward pass, with gradient scaling if training in fp16
        scaler.scale(loss).backward()
    # clip the gradient
    if grad_clip != 0.0:
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
    # step the optimizer and scaler if training in fp16
    
    scaler.step(optimizer)
    scaler.update()
    
    # flush the gradients as soon as we can, no need for this memory anymore
    optimizer.zero_grad(set_to_none=True)

    # timing and logging
    t1 = time.time()
    dt = t1 - t0
    t0 = t1
    if iter_num % log_interval == 0:
        # scale up to undo the division above, approximating the true total loss (exact would have been a sum)
        lossf = loss.item() * batch_size
        if local_iter_num >= 5: # let the training loop settle a bit
            mfu = model.estimate_mfu(batch_size, dt)
            running_mfu = mfu if running_mfu == -1.0 else 0.9*running_mfu + 0.1*mfu
        print(f"iter {iter_num}: loss {lossf:.4f}, time {dt*1000:.2f}ms, mfu {running_mfu*100:.2f}%")
    iter_num += 1
    local_iter_num += 1

    # termination conditions
    if iter_num > max_iters:
        break
    
model.eval()
None # to avoid printing the model

step 0: train loss 5.1421, val loss 5.0073
iter 0: loss 3.6166, time 5332.68ms, mfu -100.00%
iter 1: loss 4.7811, time 4451.41ms, mfu -100.00%
iter 2: loss 4.9809, time 4699.58ms, mfu -100.00%
iter 3: loss 4.9199, time 4599.42ms, mfu -100.00%
iter 4: loss 4.6358, time 5416.20ms, mfu -100.00%
iter 5: loss 4.9782, time 5019.91ms, mfu 10.15%
iter 6: loss 4.4182, time 4690.82ms, mfu 10.22%
iter 7: loss 3.9940, time 4831.82ms, mfu 10.25%
step 8: train loss 3.3427, val loss 3.3909
saving checkpoint to out
iter 8: loss 3.6332, time 19147.56ms, mfu 9.49%
iter 9: loss 4.7372, time 4994.51ms, mfu 9.56%
iter 10: loss 3.8514, time 4611.93ms, mfu 9.71%
iter 11: loss 3.8395, time 4996.79ms, mfu 9.76%
iter 12: loss 4.1297, time 5797.89ms, mfu 9.66%
iter 13: loss 3.6328, time 4780.50ms, mfu 9.76%
iter 14: loss 1.4290, time 4905.02ms, mfu 9.82%
iter 15: loss 4.3114, time 5623.81ms, mfu 9.75%
step 16: train loss 2.7270, val loss 2.8175
saving checkpoint to out
iter 16: loss 3.8459, time 19068.12ms, mfu 

Now we sample from the model

In [12]:
print(sample_model("HENRY VIII, KING OF ENGLAND", "My Wife", "Renaissance", "Love"))

Here is a Renaissance Love poem written by HENRY VIII, KING OF ENGLAND called "My Wife"

HENRY VIII, KING OF ENGLAND from Collected Poems, edited by Richard Houghton called "My Wife"

HENRY VIII, KING OF ENGLAND from Collected Poems, edited by Richard Houghton called "My Wife"
HENRY VIII, KING OF ENGLAND from Collected Poems, edited by Richard Houghton called "My Wife"
HENRY VIII, KING OF ENGLAND from Collected Poems, edited by Richard Houghton called "My Wife"
HENRY VIII, KING OF ENGLAND from Collected Poems, edited by Richard Houghton called "My Wife"
HENRY VIII, KING OF ENGLAND from Collected Poems, edited by Richard Houghton called "My Wife"
HENRY VIII, KING OF ENGLAND from Collected Poems, edited by Richard Houghton called "My Wife"
HENRY VIII, KING OF ENGLAND from Collected Poems, edited by Richard Houghton called "My Wife"
HENRY VIII, KING OF ENGLAND from Collected Poems, edited by Richard Houghton called "My Wife"
HENRY VIII


In [13]:
print(sample_model("HENRY VIII, KING OF ENGLAND", "My Wife", "Renaissance", "Love"))

Here is a Renaissance Love poem written by HENRY VIII, KING OF ENGLAND called "My Wife"

HENRY VIII, KING OF ENGLAND from Collected Poems. Copyright  1923 by Helen S. Yonge. Reprinted with the permission of New Directions Publishing Corporation. Used by permission of Alfred A. Knopf, an imprint of the Knopf Doubleday Publishing Group. All rights reserved.          My Wife,                  I did long ago pray for thy love,                  To which my heart I cry,                I have long since sung with,                       To wit, that thou do so well,                On my heart, and to thee,                   On my mind, as in thy words,                    That I may have thy love


In [14]:
print(sample_model("HENRY VIII, KING OF ENGLAND", "My Wife", "Renaissance", "Love"))

Here is a Renaissance Love poem written by HENRY VIII, KING OF ENGLAND called "My Wife"

Hark ye, my lady, here is a true love poem written by WILLIAM SHAKESPEARE called "As I Walk"

Who, since the earth, till it was a cedar;
Which, from all the birds of earth, with every shape,
Shall sing in the evening,
And do the greenbird-like way.

And my lady, where are you,
That can tell me this?
Do you, my lady, live in your couch?
Come, come, do you live in your house?
Do you live in a mansion or a field?
Do you walk barefoot, or barefoot bare,
Where you are, and where you walk?
Do you live live in a house or a store?
Do you live in a court or a street?
Do you live in a house or a store?
Do you live in a court or a street?
Do you live in a court or a house?
Do you live in a court or a store?
Do you live in a court or a house?
Do you live in a court or a store


In [15]:
print(sample_model("HENRY VIII, KING OF ENGLAND", "My Wife", "Renaissance", "Love"))

Here is a Renaissance Love poem written by HENRY VIII, KING OF ENGLAND called "My Wife"

HENRY VIII, KING OF ENGLAND, My Wife from Selected Poems, published by Liveright Publishing Group Australia.  Copyright  1933, 1966 by H. M. A. Liveright. Used by permission of Liveright Publishing Australia. All rights reserved.                                               Henry VIII, King of England from Selected Poems, published by Liveright Publishing Group Australia.                                     Henry VIII, King of England from Selected Poems, copyright  1986 by Liveright Publishing Australia.                                        Henry VIII, King of England from Selected Poems.                              


In [16]:
print(sample_model("HENRY VIII, KING OF ENGLAND", "My Wife", "Renaissance", "Love"))

Here is a Renaissance Love poem written by HENRY VIII, KING OF ENGLAND called "My Wife"

Originally published in Poetry, May, 1787. Reprinted with the permission of Houghton Mifflin Company. Copyright  1994 by Michael Houghton Mifflin Company. Used by permission of Houghton Mifflin Company. All rights reserved. <|endoftext|>What is a Modern Nature poem written by SIR PHILIP SIDNEY called "The Song of the Suckling Woom"

Originally published in Poetry, March 18, 1887. Reprinted with the permission of Houghton Mifflin Company.     Copyright  1933, 1954 by SIR PHILIP SIDNEY.      Reprinted with the permission of Houghton Mifflin Company.     Copyright  1993 by SIR PHILIP SIDNEY.     Reprinted with the permission of Houghton Mifflin Company.     Copyright  1994 by SIR PHILIP SIDNEY.     Reprinted with the permission of Houghton Mifflin Company.     Copyright  1994 by SIR PHILIP SIDNEY.     Reprinted with the permission of Houghton M


In [17]:
print(sample_model("HENRY VIII, KING OF ENGLAND", "My Wife", "Renaissance", "Love"))

Here is a Renaissance Love poem written by HENRY VIII, KING OF ENGLAND called "My Wife"

Hench hath no hand,
What skill can so strong a thing excel
A horse to run upon a cart?
                                                                                                                                                                                                                                    


In [18]:
print(sample_model("HENRY VIII, KING OF ENGLAND", "My Wife", "Renaissance", "Love"))

Here is a Renaissance Love poem written by HENRY VIII, KING OF ENGLAND called "My Wife"

HENRY VIII, KING OF ENGLAND: My Wife. from Collected Poems, edited by Eusebius of Poitiers. Copyright  1966 by The Estate of Henry VIII, King of England. Reprinted with the permission of Alfred A. Knopf, an imprint of the Knopf Doubleday Publishing Group, a division of Random House LLC. All rights reserved.                                                                                                                                                                             


In [19]:
print(sample_model("HENRY VIII, KING OF ENGLAND", "My Wife", "Renaissance", "Love"))

Here is a Renaissance Love poem written by HENRY VIII, KING OF ENGLAND called "My Wife"

Envy and jealousy made my hair grow.
Revolting my thoughts, and the mind of me black,
Those were the things which made me so poor:
A small fortune, which might not be:
And had I not a lady of surpassing beauty,
And yet spent the love of her, and been borne,
Then did the love of her still rage,
And I, whom I had so much worth,
Had so little time to envy, and so little time to hate.
By my love I sinned, and made my sin now found
The cause of my amiss, that it made me angry.
And the most grievous thing is, that I sinned:
I did sin in love, which she spake:
But my love was not love:
And I sinned, that she spake;
And sinned, that she spake;
And sinned, in love which she did.
The good that I sinned, and the good that I said,
Was one, and it is not my own.
Had I not no loves but her,
Had
