In [None]:
# Step 1: Install required libraries
!pip install transformers datasets


In [2]:
# Step 2: Import dependencies
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments, TextDataset, DataCollatorForLanguageModeling
from datasets import load_dataset

# Step 3: Load tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")

# Set pad token to eos token
tokenizer.pad_token = tokenizer.eos_token

# Step 4: Load and tokenize dataset
dataset_path = "/content/stories.txt"  # Updated path
dataset = load_dataset("text", data_files={"train": dataset_path})

def tokenize_function(examples):
    return tokenizer(
        examples["text"],
        truncation=True,
        padding="max_length",
        max_length=512,
        return_tensors="pt"
    )

tokenized_datasets = dataset.map(tokenize_function, batched=True, remove_columns=["text"])

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False  # GPT-2 is not trained with masked language modeling
)

# Step 5: Define training arguments
training_args = TrainingArguments(
    output_dir="/content/gpt2-story-model",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=4,
    save_steps=500,
    save_total_limit=2,
    logging_dir="/content/logs",
    logging_steps=100,
    prediction_loss_only=True,
    fp16=torch.cuda.is_available()  # Use mixed precision if GPU is available
)

# Step 6: Train the model
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    data_collator=data_collator
)

trainer.train()

# Step 7: Save the fine-tuned model
model.save_pretrained("/content/gpt2-story-model")
tokenizer.save_pretrained("/content/gpt2-story-model")

# Step 8: Generate text
def generate_text(prompt, max_length=200, temperature=0.7, top_p=0.9):
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    attention_mask = input_ids != tokenizer.pad_token_id  # Create attention mask
    output = model.generate(
        input_ids,
        attention_mask=attention_mask,
        max_length=max_length,
        temperature=temperature,
        top_p=top_p,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id  # Explicitly set pad token id
    )
    return tokenizer.decode(output[0], skip_special_tokens=True)

# Example prompt
prompt = input("Enter your prompt for story generation:")
print(generate_text(prompt))

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/9 [00:00<?, ? examples/s]



<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mnaisargee-22210870[0m ([33mtrials_[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Step,Training Loss


The king
The king of the great kingdom, Lord Henry, was a young boy who lived in the shadows, and was often the subject of a violent dispute. He was a friend of the king's, and was a willing and loyal servant. He was a good man, and a true friend to the king. He was a great friend of the king, and was a true friend to the king. He was a true friend to the king, and he was a true friend to the king. He was a true friend to the king, and he was a true friend to the king. He was a true friend to the king, and he was a true friend to the king. He was a true friend to the king, and he was a true friend to the king. He was a true friend to the king, and he was a true friend to the king, and he was a true friend to the king. He was a true friend to the king, and he was a true friend to
