In [4]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch

# Specify the path to your checkpoint
checkpoint_path = "checkpoint-9000"
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token


# Load the tokenizer and model from the checkpoint
# tokenizer = GPT2Tokenizer.from_pretrained(checkpoint_path)
model = GPT2LMHeadModel.from_pretrained(checkpoint_path)

# Ensure the model is in evaluation mode
model.eval()

# Move the model to the appropriate device (GPU or CPU)
device = torch.device("mps" if torch.cuda.is_available() else "cpu")
model.to(device)

# Function to generate text based on a prompt
def generate_text(prompt_text, max_length=1000, temperature=1.1, repetition_penalty=1.1, top_k=50, top_p=0.95):
    encoding = tokenizer(prompt_text, return_tensors="pt", padding=True, truncation=True, max_length=max_length).to(device)
    input_ids = encoding["input_ids"]
    attention_mask = encoding["attention_mask"]
    
    with torch.no_grad():
        output = model.generate(
            input_ids,
            attention_mask=attention_mask,
            max_length=max_length,
            num_return_sequences=1,
            temperature=temperature,
            repetition_penalty=repetition_penalty,
            do_sample=True,
            top_k=top_k,
            top_p=top_p
        )
    
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
    return generated_text


In [8]:
generated_text = generate_text("Today's weather is so good and sunny, lets go out", max_length=200)  # Shorter max_length for focused output
print(generated_text)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Today's weather is so good and sunny, lets go out to the beach, get some water, then we stay on our feet for a while, then come back to the beach. We can see what's happening in your head, you know what's going through your head? You look up at it, and you don =t think of that but it's true. " 

