## Text Generation

### Load model

In [4]:
import torch
from transformers import AutoTokenizer, GPT2LMHeadModel, GPT2Tokenizer
from transformers import AutoModel
from trainer import DEFAULT_CONFIG as config
from model import Transformer


CHECKPOINT_PATH = "./model_output/checkpoint-40000"
COMPARE_TO_GPT = False

device = cuda' if torch.cuda.is_available() else 'cpu'
print(f"Device: {device}")

tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

model_custom = Transformer(
    config['embedding_size'],
    tokenizer.vocab_size,
    config['context_length'],
    config['num_layers'],
    config['dropout'],
    config['num_heads'],
    device
)

state_dict = torch.load(CHECKPOINT_PATH, map_location=device)
model_custom.load_state_dict(state_dict)
model = AutoModel.from_pretrained(CHECKPOINT_PATH)
model_custom.to(device)

def num_parameters(m, name):
    total_params = sum(p.numel() for p in m.parameters())
    print(f"Total number of {name} parameters: {total_params/1e6:.1f} M")

num_parameters(model_custom, "custom model")
    
if COMPARE_TO_GPT:
    model_gpt = GPT2LMHeadModel.from_pretrained('gpt2')
    model_gpt.to(device)
    num_parameters(model_gpt, "gpt2")

Device: cuda


OSError: ./model_output/checkpoint-40000 does not appear to have a file named config.json. Checkout 'https://huggingface.co/./model_output/checkpoint-40000/None' for available files.

In [9]:
COMPARE_TO_GPT = False


In [7]:
import torch
from transformers import AutoTokenizer, GPT2LMHeadModel, GPT2Tokenizer
from transformers import AutoModel
from trainer import DEFAULT_CONFIG as config
from model import Transformer

tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token
device = 'cuda' if torch.cuda.is_available() else 'cpu'

model_custom = Transformer(
    config['embedding_size'],
    tokenizer.vocab_size,
    config['context_length'],
    config['num_layers'],
    config['dropout'],
    config['num_heads'],
    device
)

import torch
import safetensors.torch as st
CHECKPOINT_PATH = "/home/ubuntu/flight-mode/transformers/model_output/checkpoint-40000/model.safetensors"

# Read the file as a binary stream
with open(CHECKPOINT_PATH, 'rb') as f:
    byte_data = f.read()
    
# Load SafeTensors
data = st.load(byte_data)

# Convert SafeTensors to PyTorch tensors
# Assuming 'data' is a dictionary like the state_dict of a model
for k, v in data.items():
    data[k] = torch.tensor(v)

# Load the state dict
model_custom.load_state_dict(data)
model_custom.to(device)

  data[k] = torch.tensor(v)


Transformer(
  (token_embedding): Embedding(50257, 768)
  (pos_embedding): Embedding(512, 768)
  (blocks): Sequential(
    (0): Block(
      (ln1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (ln2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (attn): MultiHeadMaskedAttention(
        (keys_linear): Linear(in_features=768, out_features=768, bias=False)
        (queries_linear): Linear(in_features=768, out_features=768, bias=False)
        (values_linear): Linear(in_features=768, out_features=768, bias=False)
        (output_linear): Linear(in_features=768, out_features=768, bias=True)
      )
      (mlp): Sequential(
        (0): Linear(in_features=768, out_features=3072, bias=True)
        (1): GELU(approximate='none')
        (2): Linear(in_features=3072, out_features=768, bias=True)
        (3): Dropout(p=0.1, inplace=False)
      )
    )
    (1): Block(
      (ln1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (ln2): LayerNorm((768,), e

In [10]:
import time

def generate_text(prompt, max_tokens=30, temperature=1, model=None):
    """
    Generate text given a prompt
    """
    # Encode the initial text
    input_ids = tokenizer.encode(
        prompt, return_tensors="pt").to(device)

    st = time.time()
    # Generate additional tokens
    generated_ids = model.generate(input_ids, max_tokens, temperature=temperature)
    # print(f"Text generated at {generated_ids.shape[1]/(time.time() - st):.1f} tokens/second")

    # Decode the generated tokens to text
    generated_text = tokenizer.decode(
        generated_ids[0], skip_special_tokens=True
    )
    return generated_text



prompts = [
    "The president of the united states is",
    "Once upon a time",
    "The best thing about"
]

for prompt in prompts:
    print("="*50)
    print("\nCustom model: ")
    print(generate_text(prompt, model=model_custom))
    
    if COMPARE_TO_GPT:
        print("\nGPT model: ")
        print(generate_text(prompt, model=model_gpt))



Custom model: 
The president of the united states is a man who has been a key ally of the Republican Party.

The Republican Party has been a key ally of the Republican Party since the election

Custom model: 
Once upon a time, I’ve been a little bit of a fan of the game. I’ve been a fan of the game since I was a

Custom model: 
The best thing about the best thing about the best thing about the best thing about the best thing about the best thing about the best thing about the best thing about the best


## Appendix

In [3]:
raise  # don't run

RuntimeError: No active exception to reraise

In [None]:
def generate(prompt='', num_samples=10, steps=20, do_sample=True):
        
    # tokenize the input prompt into integer input sequence
    if 0:
        tokenizer = BPETokenizer()
        if prompt == '':
            # to create unconditional samples...
            # manually create a tensor with only the special <|endoftext|> token
            # similar to what openai's code does here https://github.com/openai/gpt-2/blob/master/src/generate_unconditional_samples.py
            x = torch.tensor([[tokenizer.encoder.encoder['<|endoftext|>']]], dtype=torch.long)
        else:
            x = tokenizer(prompt).to(device)
    else:
        tokenizer = GPT2Tokenizer.from_pretrained(model_type)
        if prompt == '': 
            # to create unconditional samples...
            # huggingface/transformers tokenizer special cases these strings
            prompt = '<|endoftext|>'
        encoded_input = tokenizer(prompt, return_tensors='pt').to(device)
        x = encoded_input['input_ids']
    
    # we'll process all desired num_samples in a batch, so expand out the batch dim
    x = x.expand(num_samples, -1)

    # forward the model `steps` times to get samples, in a batch
    y = model.generate(x, max_new_tokens=steps, do_sample=do_sample, top_k=40)
    
    for i in range(num_samples):
        out = tokenizer.decode(y[i].cpu().squeeze())
        print('-'*80)
        print(out)
        
model = model_gpt
generate(prompt='Once upon a time', num_samples=10, steps=20)

In [None]:
model.