In [1]:
# 🧠 Step 1: Load Model and Tokenizer
from transformers import GPT2LMHeadModel, GPT2TokenizerFast
import torch

# Load your custom tokenizer and model
model_dir = "gpt2_custom_model"
tokenizer_dir = "tokenizer_gpt2_custom"

tokenizer = GPT2TokenizerFast.from_pretrained(tokenizer_dir, pad_token="<pad>")
model = GPT2LMHeadModel.from_pretrained(model_dir)

# Set model to evaluation mode
model.eval()


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(30000, 256)
    (wpe): Embedding(512, 256)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-3): 4 x GPT2Block(
        (ln_1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=768, nx=256)
          (c_proj): Conv1D(nf=256, nx=256)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=1024, nx=256)
          (c_proj): Conv1D(nf=256, nx=1024)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=256, out_features=30000, bias=False)
)

In [2]:
# 📝 Step 2: Define Prompt and Generate Text

prompt = "Once upon a time"
input_ids = tokenizer.encode(prompt, return_tensors="pt")

# Generate continuation
output = model.generate(
    input_ids,
    max_length=100,
    num_return_sequences=1,
    do_sample=True,           # For creative/random outputs
    top_k=50,
    top_p=0.95,
    temperature=0.9,
    pad_token_id=tokenizer.pad_token_id
)

# Decode and print result
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
print("📝 Generated Text:\n")
print(generated_text)


📝 Generated Text:

Once upon a time of all, please go on the time and go to the party here.The Washington PostThe House House bill is running on March 30, 2017.The bill will receive. With the Federal House and bill a number. But the Senate will be allowed to make it clear if Congress is on the states that they would have been, according to the Senate Senate.The bill has been under the House, and Senate bill says Congress will not be an likely to be


In [4]:
def generate_text(prompt, max_len=100):
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    output = model.generate(
        input_ids,
        max_length=max_len,
        do_sample=True,
        top_k=40,
        top_p=0.9,
        temperature=0.8,
        pad_token_id=tokenizer.pad_token_id
    )
    return tokenizer.decode(output[0], skip_special_tokens=True)

# Try it:
print(generate_text("The best Tv show is "))


The best Tv show is it to get a new version.The second episode of the most popular song are the first two.The story is the name of the song. The song is the second and is also the first and its the music film is the music.The film is made the best album.The film is the first film that is on the movie. The film has shown that The film has been released in the book.The song has become the first
