Fine-tunes Hugging Face Transformers + PyTorch sentiment model on small Amazon-style reviews.

Shows live training logs and plots the training loss curve 📉.

Tests predictions before and after fine-tuning so we can see the impact of training in real time.

In [1]:
# ============================
# 📦 STEP 1: Install libraries
# ============================
!pip install transformers datasets evaluate accelerate matplotlib tf-keras -q

In [None]:
from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    pipeline,
    TrainerCallback
)
import evaluate
import numpy as np
import matplotlib.pyplot as plt

# Small dataset
data = {
    "text": [
        "Amazing quality! I love this.",
        "Not good, I expected better.",
        "Terrible product, waste of money.",
        "Awesome product and fast delivery!",
        "Worst purchase ever.",
        "I absolutely love it!",
        "Not worth the price.",
        "Fantastic quality and support."
    ],
    "label": [1, 0, 0, 1, 0, 1, 0, 1]
}

train_dataset = Dataset.from_dict({
    "text": data["text"][:6],
    "label": data["label"][:6]
})
test_dataset = Dataset.from_dict({
    "text": data["text"][6:],
    "label": data["label"][6:]
})

# Tokenizer + model
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def preprocess_function(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=64)

encoded_train = train_dataset.map(preprocess_function, batched=True)
encoded_test = test_dataset.map(preprocess_function, batched=True)
encoded_train = encoded_train.rename_column("label", "labels")
encoded_test = encoded_test.rename_column("label", "labels")
encoded_train.set_format("torch")
encoded_test.set_format("torch")

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

# Metrics
accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return accuracy.compute(predictions=predictions, references=labels)

# Training args
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    logging_strategy="steps",  # show live logs
    logging_steps=1,
    save_strategy="no",
    learning_rate=2e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=3,  # reduced for demo
    weight_decay=0.01,
    report_to="none"
)

# ✅ Proper loss callback
class LossHistoryCallback(TrainerCallback):
    def __init__(self):
        self.losses = []
    def on_log(self, args, state, control, logs=None, **kwargs):
        if logs and "loss" in logs:
            self.losses.append(logs["loss"])

loss_history = LossHistoryCallback()

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_train,
    eval_dataset=encoded_test,
    compute_metrics=compute_metrics,
    callbacks=[loss_history]
)

# Train
trainer.train()

# Plot loss
plt.figure(figsize=(8,5))
plt.plot(loss_history.losses, label="Training Loss")
plt.xlabel("Steps")
plt.ylabel("Loss")
plt.title("Training Loss Curve")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show(block=False)  # ✅ non-blocking plot

# Inference
sentiment_pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
test_reviews = [
    "Not good, I expected better.",
    "Amazing quality!",
    "Worst purchase ever.",
    "I love this product so much!",
    "Terrible quality and very slow shipping.",
    "Absolutely fantastic experience!"
]

print("\n🧪 Test predictions:")
for review in test_reviews:
    res = sentiment_pipeline(review)[0]
    print(f"{review} ➝ {res['label']} ({res['score']:.2f})")




Before fine-tuning: Some sentences like “Not good, I expected better” might be misclassified as positive.

After fine-tuning: Predictions become more accurate.

📈 Loss curve clearly shows how the model is learning over steps.