## Contents
1. Sample from pre-trained model
1. Sample from from-scratch model

In [1]:
from gpt2 import GPT, GPTConfig # our GPT class
import tiktoken
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
enc = tiktoken.get_encoding('gpt2')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # dynamic device

## Pre-trained Model

In [3]:
pretrained_model = GPT(GPTConfig).from_pretrained('gpt2').to(device)

loading weights from pretrained gpt: gpt2


In [4]:
batches = 3
k = 10
num_generations = 5

prompt = "Hello, my name is"
tokens = enc.encode(prompt)
x = torch.tensor(tokens).to(device) # (T)
x = x.unsqueeze(0).repeat(batches, 1) # (B, T)
print('Initial x:', x.shape)

pretrained_model.eval();
for i in range(num_generations): # generate num_generation tokens
    with torch.no_grad(): # no need to track gradients
        logits, _ = pretrained_model(x) # (B, T, Vocab)
        logits = logits[:, -1, :] # (B, Vocab)
        probs = F.softmax(logits, dim=-1) # (B, Vocab)
        topk_probs, topk_x = torch.topk(probs, k) # (B, k), (B, k)
        ix = torch.multinomial(topk_probs, num_samples=1) # (B, 1)
        next_x = topk_x.gather(dim=1, index=ix) # (B, 1)
        x = torch.cat((x, next_x), dim=-1) # (B, T+1)
print('Generated x:', x.shape)

Initial x: torch.Size([3, 5])
Generated x: torch.Size([3, 10])


In [5]:
for i in range(batches):
    print(f'Batch {i+1}:', enc.decode(x[i].tolist()))

Batch 1: Hello, my name is Michael. I'm a
Batch 2: Hello, my name is Kiyoshi Kiy
Batch 3: Hello, my name is David and I am a


## From Scratch Model

In [6]:
model = GPT(GPTConfig).to(device)

In [7]:
batches = 3
k = 10
num_generations = 5

prompt = "Hello, my name is"
tokens = enc.encode(prompt)
x = torch.tensor(tokens).to(device) # (T)
x = x.unsqueeze(0).repeat(batches, 1) # (B, T)
print('Initial x:', x.shape)

model.eval();
for i in range(num_generations): # generate num_generation tokens
    with torch.no_grad(): # no need to track gradients
        logits, _ = model(x) # (B, T, Vocab)
        logits = logits[:, -1, :] # (B, Vocab)
        probs = F.softmax(logits, dim=-1) # (B, Vocab)
        topk_probs, topk_x = torch.topk(probs, k) # (B, k), (B, k)
        ix = torch.multinomial(topk_probs, num_samples=1) # (B, 1)
        next_x = topk_x.gather(dim=1, index=ix) # (B, 1)
        x = torch.cat((x, next_x), dim=-1) # (B, T+1)
print('Generated x:', x.shape)

Initial x: torch.Size([3, 5])
Generated x: torch.Size([3, 10])


In [8]:
for i in range(batches):
    print(f'Batch {i+1}:', enc.decode(x[i].tolist())) # Generating random tokens

Batch 1: Hello, my name is HASgra keysweetweet
Batch 2: Hello, my name isaggressive 2022 desc descabul
Batch 3: Hello, my name isvantvant although althoughweet
