### Shows how one can generate text given a prompt and some hyperparameters, using minGPT

In [1]:
import torch

from model import GPT2
from utils import set_seed
from bpe import BPETokenizer

In [2]:
set_seed(3407)
model_type = 'gpt2-xl'
#model_type = 'gpt2-large'
#model_type = 'gpt2-medium'
#model_type = 'gpt2'


In [3]:
#model     = GPT.from_pretrained(model_type)
model     = GPT2.load_weights(model_type)
tokenizer = BPETokenizer()



In [4]:
model.to("cuda")
model.eval()

GPT2(
  (tEmbds): Embedding(50257, 1600)
  (pEmbds): Embedding(1024, 1600)
  (blocks): ModuleList(
    (0-47): 48 x Basic(
      (norm1): LayerNorm((1600,), eps=1e-05, elementwise_affine=True)
      (attn): Attention(
        (dense1): Linear(in_features=1600, out_features=4800, bias=True)
        (dense2): Linear(in_features=1600, out_features=1600, bias=True)
      )
      (norm2): LayerNorm((1600,), eps=1e-05, elementwise_affine=True)
      (mlp): MLP(
        (dense1): Linear(in_features=1600, out_features=6400, bias=True)
        (dense2): Linear(in_features=6400, out_features=1600, bias=True)
      )
    )
  )
  (norm): LayerNorm((1600,), eps=1e-05, elementwise_affine=True)
  (head): Linear(in_features=1600, out_features=50257, bias=False)
)

In [5]:
def generate(prompt="", pSamples=10, nNewTokens=20, topk=20):  
    # tokenize the input prompt into integer input sequence
    if prompt == '':
        # to create unconditional samples...
        # manually create a tensor with only the special <|endoftext|> token
        # similar to what openai's code does here https://github.com/openai/gpt-2/blob/master/src/generate_unconditional_samples.py
        x = torch.tensor([[tokenizer.encoder.encoder['<|endoftext|>']]], dtype=torch.long)
    else:
        x = tokenizer(prompt).to("cuda")
    
    # we'll process all desired num_samples in a batch, so expand out the batch dim
    prompts = x.expand(pSamples, -1)

    # forward the model `steps` times to get samples, in a batch
    y = model.generate(prompts, nNewTokens=nNewTokens, topk=20)
    
    for i in range(pSamples):
        out = tokenizer.decode(y[i].cpu().squeeze())
        print(out)
        print('-'*len(out))

In [6]:
generate(prompt='The cold', pSamples=10, nNewTokens=20, topk=20)

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], device='cuda:0')
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], device='cuda:0')
tensor([[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.2170, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],
       device='cuda:0')
tensor([[0., 0., 0.,  ..., 0., 0., 0