In [30]:
from datasets import load_dataset
from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments, TextDataset, DataCollatorForLanguageModeling
# Step 1: Install libraries
!pip install --upgrade transformers datasets fsspec


Collecting fsspec
  Using cached fsspec-2025.5.1-py3-none-any.whl.metadata (11 kB)


In [67]:
sample_text = """
Q: What does Lucio do at night?
A: Lucio enjoys deep learning and studies AI at night.

Q: Where does Lucio live?
A: He lives in Charlotte, North Carolina.

Q: Who is Lilly?
A: Lilly is Lucio's dog.

Q: What does Lucio dream about?
A: He dreams about Florida and building AI projects by the ocean.
"""


with open("train.txt", "w", encoding="utf-8") as f:
    for _ in range(200):
        f.write(sample_text + "\n")


In [68]:
# Step 3: Load tokenizer and model
from transformers import GPT2Tokenizer, GPT2LMHeadModel

model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

In [69]:
# Step 4: Prepare dataset
from transformers import TextDataset, DataCollatorForLanguageModeling

train_dataset = TextDataset(
    tokenizer=tokenizer,
    file_path="train.txt",
    block_size=128
)

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer, mlm=False,
)



In [70]:
# Step 5: Training arguments and Trainer
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir="./gpt2-finetuned",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=2,
    save_steps=500,
    save_total_limit=2,
    logging_steps=100,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    data_collator=data_collator,
)

In [71]:
# Step 6: Train the model
trainer.train()

Step,Training Loss
100,0.1443


TrainOutput(global_step=102, training_loss=0.141518107834546, metrics={'train_runtime': 48.5054, 'train_samples_per_second': 4.206, 'train_steps_per_second': 2.103, 'total_flos': 13325893632000.0, 'train_loss': 0.141518107834546, 'epoch': 3.0})

In [72]:
# ♻️ Reload model from fine-tuned directory
from transformers import GPT2LMHeadModel
import torch  # ← add this line

# 🔐 Save your fine-tuned model
trainer.save_model("./gpt2-finetuned")

# ♻️ Reload model from fine-tuned directory
model = GPT2LMHeadModel.from_pretrained("./gpt2-finetuned")
model.to("cuda" if torch.cuda.is_available() else "cpu")

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [73]:
input_text = "Q: Who is Lilly?\nA:"
inputs = tokenizer.encode(input_text, return_tensors="pt").to(model.device)
attention_mask = torch.ones_like(inputs)

outputs = model.generate(
    inputs,
    attention_mask=attention_mask,
    max_length=60,
    do_sample=True,
    top_k=50,
    top_p=0.95,
    temperature=0.9,
    num_return_sequences=1,
    pad_token_id=tokenizer.eos_token_id  # ✅ suppress warning
)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))


Q: Who is Lilly?
A: She's a curious dog named Bongo.
He barked at everything, especially mail trucks and squirrels.
Every afternoon, he would wait for the mailman like clockwork.


Once upon a time there was a curious dog named B
