In [None]:
!pip install torch transformers datasets

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [None]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments, TextDataset, DataCollatorForLanguageModeling

# Load tokenizer and model
model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

# Function to load dataset
def load_dataset(file_path, tokenizer, block_size=512):
    dataset = TextDataset(
        tokenizer=tokenizer,
        file_path=file_path,
        block_size=block_size
    )
    return dataset

# Load and tokenize dataset
train_dataset = load_dataset("/content/story_dataset.txt", tokenizer)

# Data collator for batching
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

# Training arguments
training_args = TrainingArguments(
    output_dir="./gpt2-finetuned-stories",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=4,
    save_steps=500,
    save_total_limit=2,
    logging_dir="./logs",
)

# Trainer setup
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    data_collator=data_collator
)

# Train the model
trainer.train()

# Save the fine-tuned model
model.save_pretrained("./gpt2-finetuned-stories")
tokenizer.save_pretrained("./gpt2-finetuned-stories")

print("Fine-tuning complete! Model saved.")

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mritikagadapa[0m ([33mtrials_[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Step,Training Loss


Fine-tuning complete! Model saved.


In [None]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel

# Load the fine-tuned model
model_name = "./gpt2-finetuned-stories"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

# Generate a story
def generate_story(prompt, max_length=200):
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_length=max_length, temperature=0.7, top_k=50, top_p=0.9)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Example story prompt
story_prompt = "Once upon a time in a mysterious forest,"
print(generate_story(story_prompt))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Once upon a time in a mysterious forest, a young girl named Lina was kidnapped by a mysterious man. She was forced to join a group of young men who were trying to save her from a mysterious man who had kidnapped her.

The story of the story begins in the year 2066, when a young girl named Lina was kidnapped by a mysterious man who had kidnapped her. She was forced to join a group of young men who were trying to save her from a mysterious man who had kidnapped her.

The story of the story begins in the year 2066, when a young girl named Lina was kidnapped by a mysterious man who had kidnapped her. She was forced to join a group of young men who were trying to save her from a mysterious man who had kidnapped her.

The story of the story begins in the year 2066, when a young girl named Lina was kidnapped by a mysterious man who had kidnapped her. She was forced to join a group
