In [1]:
pip install transformers torch datasets


Collecting transformers
  Downloading transformers-4.51.0-py3-none-any.whl.metadata (38 kB)
Collecting torch
  Downloading torch-2.6.0-cp312-none-macosx_11_0_arm64.whl.metadata (28 kB)
Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting filelock (from transformers)
  Downloading filelock-3.18.0-py3-none-any.whl.metadata (2.9 kB)
Collecting huggingface-hub<1.0,>=0.30.0 (from transformers)
  Downloading huggingface_hub-0.30.1-py3-none-any.whl.metadata (13 kB)
Collecting regex!=2019.12.17 (from transformers)
  Downloading regex-2024.11.6-cp312-cp312-macosx_11_0_arm64.whl.metadata (40 kB)
Collecting tokenizers<0.22,>=0.21 (from transformers)
  Downloading tokenizers-0.21.1-cp39-abi3-macosx_11_0_arm64.whl.metadata (6.8 kB)
Collecting safetensors>=0.4.3 (from transformers)
  Downloading safetensors-0.5.3-cp38-abi3-macosx_11_0_arm64.whl.metadata (3.8 kB)
Collecting networkx (from torch)
  Downloading networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)
C

In [3]:
pip install tf-keras


Collecting tf-keras
  Downloading tf_keras-2.19.0-py3-none-any.whl.metadata (1.8 kB)
Collecting tensorflow<2.20,>=2.19 (from tf-keras)
  Downloading tensorflow-2.19.0-cp312-cp312-macosx_12_0_arm64.whl.metadata (4.0 kB)
Collecting tensorboard~=2.19.0 (from tensorflow<2.20,>=2.19->tf-keras)
  Downloading tensorboard-2.19.0-py3-none-any.whl.metadata (1.8 kB)
Collecting ml-dtypes<1.0.0,>=0.5.1 (from tensorflow<2.20,>=2.19->tf-keras)
  Downloading ml_dtypes-0.5.1-cp312-cp312-macosx_10_9_universal2.whl.metadata (21 kB)
Downloading tf_keras-2.19.0-py3-none-any.whl (1.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m907.0 kB/s[0m eta [36m0:00:00[0ma [36m0:00:02[0m
[?25hDownloading tensorflow-2.19.0-cp312-cp312-macosx_12_0_arm64.whl (252.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m252.7/252.7 MB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:02[0m
[?25hDownloading ml_dtypes-0.5.1-cp312-cp312-macosx_10_9_universal2.whl

In [None]:
pip uninstall keras


In [None]:
pip install keras==2.15.0


In [None]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments
from datasets import load_dataset

# 1. Load Pre-trained Model and Tokenizer
model_name = "gpt2"  # You can use any pre-trained model (e.g., 'gpt2', 'bert-base-uncased', etc.)
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

# 2. Load Dataset
dataset = load_dataset("wikitext", "wikitext-103-raw-v1")  # You can use your own dataset

# 3. Tokenize the Dataset
def tokenize_function(examples):
    return tokenizer(examples["text"], return_tensors="pt", padding="max_length", truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True, remove_columns=["text"])

# 4. Define Training Arguments
training_args = TrainingArguments(
    output_dir="./results",  # Directory to save model checkpoints
    evaluation_strategy="epoch",  # Evaluate after each epoch
    per_device_train_batch_size=4,  # Adjust batch size based on your GPU memory
    per_device_eval_batch_size=4,
    num_train_epochs=3,  # Set the number of epochs
    logging_dir="./logs",  # Directory to save logs
    logging_steps=200,
    save_steps=1000,  # Save model after every 1000 steps
    save_total_limit=2,  # Keep only 2 checkpoints
)

# 5. Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
)

# 6. Train the Model
trainer.train()

# 7. Save the Fine-Tuned Model
trainer.save_model("./fine_tuned_gpt2")

# 8. Load and Use the Fine-Tuned Model
model = GPT2LMHeadModel.from_pretrained("./fine_tuned_gpt2")
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

# Generate Text with the Fine-Tuned Model
input_text = "Once upon a time, "
inputs = tokenizer(input_text, return_tensors="pt")
outputs = model.generate(inputs["input_ids"], max_length=50, num_return_sequences=1)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

