In [11]:
pip install transformers datasets torch tqdm scikit-learn

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [5]:
import torch
from torch.utils.data import DataLoader
from torch.optim import AdamW
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import get_scheduler
from datasets import load_dataset
from tqdm.auto import tqdm
from sklearn.metrics import accuracy_score

# Simple function to fine-tune a transformer model on a small dataset
def fine_tune_transformer():
    print("Starting the fine-tuning process...")

    # Step 1: Load a small dataset for demonstration
    print("Loading dataset...")
    dataset = load_dataset("tweet_eval", "emotion")

    # Use a larger training subset and all available validation samples
    train_dataset = dataset["train"].select(range(2000))  # 2000 examples
    eval_dataset = dataset["validation"].select(range(len(dataset["validation"])))  # Use all available validation samples

    # Step 2: Load a small pre-trained model
    print("Loading pre-trained model and tokenizer...")
    model_name = "distilbert-base-uncased"  # Small model, good for CPU
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    # Step 3: Prepare the data in a simple way
    def preprocess_data(examples):
        # Convert text to tokens with simple padding
        return tokenizer(
            examples["text"],
            padding="max_length",
            truncation=True,
            max_length=64  # Shorter sequences for faster processing
        )

    print("Preprocessing data...")
    train_encoded = train_dataset.map(preprocess_data, batched=True)
    eval_encoded = eval_dataset.map(preprocess_data, batched=True)

    # Keep only the columns needed for training
    train_encoded = train_encoded.remove_columns(["text"])
    eval_encoded = eval_encoded.remove_columns(["text"])

    # Format data for PyTorch
    train_encoded.set_format("torch")
    eval_encoded.set_format("torch")

    # Step 4: Set up data loaders with a slightly larger batch size
    batch_size = 16  # Larger batch size for CPU
    train_loader = DataLoader(train_encoded, shuffle=True, batch_size=batch_size)
    eval_loader = DataLoader(eval_encoded, batch_size=batch_size)

    # Step 5: Initialize the model
    num_labels = len(set(train_dataset["label"]))  # Number of emotion classes
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=num_labels
    )

    # Step 6: Set up training with adjusted parameters
    optimizer = AdamW(model.parameters(), lr=2e-5)  # Higher learning rate for better learning
    num_epochs = 3  # 3 epochs for better convergence

    # Simple linear learning rate scheduler
    num_training_steps = num_epochs * len(train_loader)
    lr_scheduler = get_scheduler(
        "linear",
        optimizer=optimizer,
        num_warmup_steps=0,
        num_training_steps=num_training_steps
    )

    # Step 7: Simple training loop
    device = torch.device("cpu")  # Explicitly use CPU
    model.to(device)

    print(f"Training for {num_epochs} epochs...")

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0

        for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
            # Move batch to device
            batch = {k: v.to(device) for k, v in batch.items()}

            # Forward pass - rename 'label' to 'labels' which is what the model expects
            inputs = {k: v for k, v in batch.items() if k != 'label'}
            if 'label' in batch:
                inputs['labels'] = batch['label']
            outputs = model(**inputs)
            loss = outputs.loss
            train_loss += loss.item()

            # Backward pass
            loss.backward()

            # Update weights
            optimizer.step()
            lr_scheduler.step()
            optimizer.zero_grad()

        # Print average loss for the epoch
        avg_train_loss = train_loss / len(train_loader)
        print(f"Average training loss: {avg_train_loss:.4f}")

        # Evaluation phase after each epoch
        model.eval()
        all_predictions = []
        all_labels = []

        for batch in tqdm(eval_loader, desc="Evaluating"):
            batch = {k: v.to(device) for k, v in batch.items()}

            # Same fix for evaluation - rename 'label' to 'labels'
            inputs = {k: v for k, v in batch.items() if k != 'label'}
            labels = batch['label']

            with torch.no_grad():
                outputs = model(**inputs)

            predictions = torch.argmax(outputs.logits, dim=-1)
            all_predictions.extend(predictions.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

        # Calculate and print accuracy
        accuracy = accuracy_score(all_labels, all_predictions)
        print(f"Validation accuracy: {accuracy:.4f}")

    # Step 8: Inference with the fine-tuned model
    print("Performing inference with the fine-tuned model...")

    # Define the emotion labels (based on tweet_eval emotion dataset)
    emotion_labels = {0: "anger", 1: "joy", 2: "optimism", 3: "sadness"}

    # Function to predict emotion for a given text
    def predict_emotion(text):
        # Tokenize the input text
        inputs = tokenizer(
            text,
            padding="max_length",
            truncation=True,
            max_length=64,
            return_tensors="pt"  # Return PyTorch tensors
        )

        # Move inputs to the device
        inputs = {k: v.to(device) for k, v in inputs.items()}

        # Make prediction
        with torch.no_grad():
            outputs = model(**inputs)
            logits = outputs.logits
            predicted_class = torch.argmax(logits, dim=-1).item()

        # Return the predicted emotion
        return emotion_labels[predicted_class]

    # Test with example inputs
    test_texts = [
        "I am so happy today, life is great!",
        "This is the worst day ever, everything went wrong.",
        "I'm feeling hopeful about the future.",
        "Why does everything always go wrong for me?"
    ]

    for text in test_texts:
        predicted_emotion = predict_emotion(text)
        print(f"Text: {text}")
        print(f"Predicted Emotion: {predicted_emotion}\n")

    print("Fine-tuning and inference completed!")
    return model, tokenizer

# Run the fine-tuning process
if __name__ == "__main__":
    fine_tune_transformer()

Starting the fine-tuning process...
Loading dataset...
Loading pre-trained model and tokenizer...
Preprocessing data...


Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/374 [00:00<?, ? examples/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training for 3 epochs...


Epoch 1/3:   0%|          | 0/125 [00:00<?, ?it/s]

Average training loss: 1.0559


Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Validation accuracy: 0.7406


Epoch 2/3:   0%|          | 0/125 [00:00<?, ?it/s]

Average training loss: 0.5695


Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Validation accuracy: 0.7701


Epoch 3/3:   0%|          | 0/125 [00:00<?, ?it/s]

Average training loss: 0.3819


Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Validation accuracy: 0.7674
Performing inference with the fine-tuned model...
Text: I am so happy today, life is great!
Predicted Emotion: joy

Text: This is the worst day ever, everything went wrong.
Predicted Emotion: sadness

Text: I'm feeling hopeful about the future.
Predicted Emotion: sadness

Text: Why does everything always go wrong for me?
Predicted Emotion: sadness

Fine-tuning and inference completed!
