In [None]:
# Simple & Fast Transformer Fine-tuning - No BS Version
# Just the essentials to get it working quickly!
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Traine
from datasets import load_dataset
from sklearn.metrics import accuracy_score
import numpy as np
print("■ Quick Fine-tuning Setup - Let's go fast!")
# Use DistilBERT - it's much faster than BERT
MODEL_NAME = "distilbert-base-uncased"
MAX_LENGTH = 128 # Short sequences = faster training
BATCH_SIZE = 32 # Bigger batches = faster on GPU
EPOCHS = 1 # Just 1 epoch to see if it works
print(f"Using model: {MODEL_NAME}")
print(f"Device: {'GPU' if torch.cuda.is_available() else 'CPU'}")
# Load tiny dataset (just 1000 samples)
print("■ Loading AG News dataset (small subset)...")
dataset = load_dataset("ag_news")
small_train = dataset["train"].select(range(1000)) # Only 1k samples
small_test = dataset["test"].select(range(200)) # Only 200 test samples
print(f"Train samples: {len(small_train)}")
print(f"Test samples: {len(small_test)}")
# Quick tokenizer setup
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
def tokenize_function(examples):
return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=MAX_LENGTH)
# Tokenize datasets
print("■ Quick tokenization...")
train_dataset = small_train.map(tokenize_function, batched=True)
eval_dataset = small_test.map(tokenize_function, batched=True)
# Load model
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=4)
# Simple metrics function
def compute_metrics(eval_pred):
predictions, labels = eval_pred
predictions = np.argmax(predictions, axis=1)
return {"accuracy": accuracy_score(labels, predictions)}
# Minimal training arguments - optimized for speed
training_args = TrainingArguments(
output_dir="./quick_results",
num_train_epochs=EPOCHS,
per_device_train_batch_size=BATCH_SIZE,
per_device_eval_batch_size=BATCH_SIZE,
logging_steps=50,
eval_strategy="epoch",
save_strategy="no", # Don't save to disk - saves time
report_to=None, # No logging to external services
dataloader_num_workers=0, # Avoid multiprocessing overhead
)
# Create trainer
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
compute_metrics=compute_metrics,
)
print("■ Starting quick training...")
print("=" * 50)
# Train the model
trainer.train()
# Quick evaluationprint("\n■ Quick evaluation...")
results = trainer.evaluate()
print(f"Final Accuracy: {results['eval_accuracy']:.4f}")
# Test some predictions
print("\n■ Testing predictions...")
test_texts = [
"Apple stock rises after earnings report",
"Football team wins championship",
"New AI breakthrough announced",
"Political debate continues"
]
label_names = ["World", "Sports", "Business", "Technology"]
# Get device that model is on
device = next(model.parameters()).device
print(f"Model is on: {device}")
for text in test_texts:
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=MAX_LENGT
# Move inputs to same device as model
inputs = {k: v.to(device) for k, v in inputs.items()}
with torch.no_grad():
outputs = model(**inputs)
prediction = torch.argmax(outputs.logits, dim=-1).item()
print(f"Text: '{text[:40]}...'")
print(f"Predicted: {label_names[prediction]}\n")
print("■ Done! That was quick!")
print("\nTo make it even faster next time:")
print("- Use fewer samples: dataset.select(range(500))")
print("- Shorter sequences: MAX_LENGTH = 64")
print("- Bigger batches: BATCH_SIZE = 64 (if GPU memory allows)")
print("- Use 'distilbert-base-uncased' (smallest model)")
# Optional: Save just the model weights if needed
# model.save_pretrained("./quick_model")
# tokenizer.save_pretrained("./quick_model")