## Text Generation

### Load model

In [1]:
import torch
from transformers import AutoTokenizer, GPT2LMHeadModel, GPT2Tokenizer
from trainer import DEFAULT_CONFIG as config
from transformer import Transformer


MODEL_PATH = "./model_output/checkpoint-120000/pytorch_model.bin"
COMPARE_TO_GPT = True

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Device: {device}")

tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

model_custom = Transformer(
    config['embedding_size'],
    tokenizer.vocab_size,
    config['context_length'],
    config['num_layers'],
    config['dropout'],
    config['mult'],
    config['num_heads'],
    device
)
state_dict = torch.load(MODEL_PATH, map_location=device)
model_custom.load_state_dict(state_dict)
model_custom.to(device)

def num_parameters(m, name):
    total_params = sum(p.numel() for p in m.parameters())
    print(f"Total number of {name} parameters: {total_params/1e6:.1f} M")

num_parameters(model_custom, "custom model")
    
if COMPARE_TO_GPT:
    model_gpt = GPT2LMHeadModel.from_pretrained('gpt2')
    model_gpt.to(device)
    num_parameters(model_gpt, "gpt2")

Device: cuda
Total number of custom model parameters: 60.1 M
Total number of gpt2 parameters: 124.4 M


In [None]:
y = x + self.attn(self.ln_1(x))

y + self.mlpf(self.ln_2(y))

In [None]:
y = self.ln1(self.attn(x) + x)
x = self.ln2(self.mlp(y) + y)

In [None]:
x = x + self.mlpf(self.ln_2(x + self.attn(self.ln_1(x))))

In [None]:

        x = x + self.attn(self.ln_attn(x))
        x = x + self.ff(self.ln_ff(x))


In [4]:
model_gpt

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0): GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
      (1): GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )


In [2]:
import time

def generate_text(prompt, max_tokens=30, temperature=1, model=None):
    """
    Generate text given a prompt
    """
    # Encode the initial text
    input_ids = tokenizer.encode(
        prompt, return_tensors="pt").to(device)

    st = time.time()
    # Generate additional tokens
    generated_ids = model.generate(input_ids, max_tokens, temperature=temperature)
    # print(f"Text generated at {generated_ids.shape[1]/(time.time() - st):.1f} tokens/second")

    # Decode the generated tokens to text
    generated_text = tokenizer.decode(
        generated_ids[0], skip_special_tokens=True
    )
    return generated_text



prompts = [
    "The president of the united states is",
    "Once upon a time",
    "The best thing about"
]

for prompt in prompts:
    print("="*50)
    print("\nCustom model: ")
    print(generate_text(prompt, model=model_custom))
    
    if COMPARE_TO_GPT:
        print("\nGPT model: ")
        print(generate_text(prompt, model=model_gpt))



Custom model: 


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


The president of the united states is a new way of making the country a better place for the country.

The president of the United States, who is the first president of the

GPT model: 
The president of the united states is not a member of the United States Senate. He is not a member of the United States House of Representatives. He

Custom model: 


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Once upon a time, I was a little kid. I was a kid. I was a kid. I was a kid. I was a kid. I was a

GPT model: 
Once upon a time, the world was a place of great beauty and great danger. The world was a place of great danger, and the world was

Custom model: 


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


The best thing about the world is that it is the most important thing about the world.

The world is the most important thing about the world. It is the

GPT model: 
The best thing about this is that it's a very simple and easy way to get started.

The first thing you need to do is to


## Appendix

In [3]:
raise  # don't run

RuntimeError: No active exception to reraise

In [None]:
def generate(prompt='', num_samples=10, steps=20, do_sample=True):
        
    # tokenize the input prompt into integer input sequence
    if 0:
        tokenizer = BPETokenizer()
        if prompt == '':
            # to create unconditional samples...
            # manually create a tensor with only the special <|endoftext|> token
            # similar to what openai's code does here https://github.com/openai/gpt-2/blob/master/src/generate_unconditional_samples.py
            x = torch.tensor([[tokenizer.encoder.encoder['<|endoftext|>']]], dtype=torch.long)
        else:
            x = tokenizer(prompt).to(device)
    else:
        tokenizer = GPT2Tokenizer.from_pretrained(model_type)
        if prompt == '': 
            # to create unconditional samples...
            # huggingface/transformers tokenizer special cases these strings
            prompt = '<|endoftext|>'
        encoded_input = tokenizer(prompt, return_tensors='pt').to(device)
        x = encoded_input['input_ids']
    
    # we'll process all desired num_samples in a batch, so expand out the batch dim
    x = x.expand(num_samples, -1)

    # forward the model `steps` times to get samples, in a batch
    y = model.generate(x, max_new_tokens=steps, do_sample=do_sample, top_k=40)
    
    for i in range(num_samples):
        out = tokenizer.decode(y[i].cpu().squeeze())
        print('-'*80)
        print(out)
        
model = model_gpt
generate(prompt='Once upon a time', num_samples=10, steps=20)

In [None]:
model.