In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pandas as pd
from torch.utils.data import DataLoader, Dataset
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score, classification_report

2025-06-27 00:00:08.360283: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-06-27 00:00:08.372651: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1750982408.387080 1005914 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1750982408.391478 1005914 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1750982408.403102 1005914 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [2]:
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
# Load dataset
training = pd.read_csv("train_dataset.csv")
test = pd.read_csv("test_dataset.csv")

In [4]:
# Define features and target
X = training["Sentence"].astype(str)
y = training["Emotion"]

# Encode target labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [5]:
# Split into train and validation (80% train, 20% validation)
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)

# Load test data
X_test = test['Sentence'].astype(str)
y_test = test['Emotion']

y_test_encoded = label_encoder.transform(y_test)

In [6]:
# Load BERTje tokenizer
model_name = "wietsedv/bert-base-dutch-cased"
tokenizer = BertTokenizer.from_pretrained(model_name)

In [7]:
# Tokenize text
class EmotionDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        encoding = self.tokenizer(
            text,
            padding="max_length",
            truncation=True,
            max_length=self.max_length,
            return_tensors="pt",
        )
        return {
            "input_ids": encoding["input_ids"].squeeze(0),
            "attention_mask": encoding["attention_mask"].squeeze(0),
            "label": torch.tensor(label, dtype=torch.long),
        }

In [8]:
# Create datasets
train_dataset = EmotionDataset(X_train.tolist(), y_train, tokenizer)
val_dataset = EmotionDataset(X_val.tolist(), y_val, tokenizer)
test_dataset = EmotionDataset(X_test.tolist(), y_test_encoded, tokenizer)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [9]:
# Load BERTje model
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=len(label_encoder.classes_))
model.to(device)

# Training setup
optimizer = optim.AdamW(model.parameters(), lr=2e-5)
criterion = nn.CrossEntropyLoss()

def train(model, train_loader):
    model.train()
    total_loss, total_correct = 0, 0
    predictions, true_labels = [], []
    for batch in train_loader:
        optimizer.zero_grad()
        input_ids, attention_mask, labels = batch['input_ids'].to(device), batch['attention_mask'].to(device), batch['label'].to(device)
        outputs = model(input_ids, attention_mask=attention_mask)
        loss = criterion(outputs.logits, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        preds = torch.argmax(outputs.logits, dim=1)
        predictions.extend(preds.cpu().numpy())
        true_labels.extend(labels.cpu().numpy())
    return total_loss / len(train_loader), accuracy_score(true_labels, predictions)

def evaluate(model, train_loader):
    model.eval()
    predictions, true_labels = [], []
    with torch.no_grad():
        for batch in train_loader:
            input_ids, attention_mask, labels = batch['input_ids'].to(device), batch['attention_mask'].to(device), batch['label'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            preds = torch.argmax(outputs.logits, dim=1)
            predictions.extend(preds.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())
    acc = accuracy_score(true_labels, predictions)
    f1 = f1_score(true_labels, predictions, average='weighted')

    # Convert numerical predictions back to emotion names for the classification report
    pred_emotions = label_encoder.inverse_transform(predictions)
    true_emotions = label_encoder.inverse_transform(true_labels)
    
    return acc, f1, classification_report(true_emotions, pred_emotions)

for epoch in range(3):
    train_loss, train_acc = train(model, train_loader)  # Train on train_loader
    val_acc, val_f1, val_report = evaluate(model, val_loader)  # Evaluate on validation set

    print(f"Epoch {epoch+1}: Train Loss = {train_loss:.4f}, Train Acc = {train_acc:.4f}")
    print(f"Validation Acc = {val_acc:.4f}, Validation F1 = {val_f1:.4f}")
    print(val_report)

test_acc, test_f1, test_report = evaluate(model, test_loader)
print(f"Final Test Acc = {test_acc:.4f}, Test F1 = {test_f1:.4f}")
print(test_report)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at wietsedv/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1: Train Loss = 1.3364, Train Acc = 0.5032
Validation Acc = 0.6964, Validation F1 = 0.6959
              precision    recall  f1-score   support

       anger       1.00      0.71      0.83        49
     disgust       0.88      0.68      0.77        75
        fear       0.67      0.93      0.78       198
   happiness       0.52      0.73      0.61       190
     neutral       0.72      0.53      0.61       242
     sadness       0.79      0.68      0.73       209
    surprise       0.75      0.65      0.70       216

    accuracy                           0.70      1179
   macro avg       0.76      0.70      0.72      1179
weighted avg       0.72      0.70      0.70      1179

Epoch 2: Train Loss = 0.5301, Train Acc = 0.8242
Validation Acc = 0.8134, Validation F1 = 0.8120
              precision    recall  f1-score   support

       anger       1.00      0.96      0.98        49
     disgust       0.95      0.93      0.94        75
        fear       0.89      0.95      0.92   

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
import numpy as np
# Get predictions on test set
y_true = []
y_pred = []

model.eval()
with torch.no_grad():
    for batch in test_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)
        
        outputs = model(input_ids, attention_mask=attention_mask)
        preds = torch.argmax(outputs.logits, dim=1)
        
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(preds.cpu().numpy())

# Convert numerical labels to emotion names
y_true_emotions = label_encoder.inverse_transform(y_true)
y_pred_emotions = label_encoder.inverse_transform(y_pred)

# Get unique emotions that appear in predictions
unique_emotions = np.unique(np.concatenate([y_true_emotions, y_pred_emotions]))

# Compute confusion matrix with emotion names
cm = confusion_matrix(y_true_emotions, y_pred_emotions, labels=pred_emotions)

# Convert numerical labels to categorical labels using label_encoder
filtered_labels = label_encoder.inverse_transform(unique_labels)  # Convert numbers to category names

# Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=filtered_labels, yticklabels=filtered_labels)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

# Print emotion mapping for reference
print("\nEmotion Label Mapping:")
for i, emotion in enumerate(label_encoder.classes_):
    print(f"{i}: {emotion}")

In [12]:
# Save the fine-tuned model and tokenizer
model_save_path = "./finetuned-bertje_emotion_classifier"

# Create directory if it doesn't exist
import os
os.makedirs(model_save_path, exist_ok=True)

# Save the model and tokenizer
model.save_pretrained(model_save_path)
tokenizer.save_pretrained(model_save_path)

# Also save the label encoder for future use
import joblib
joblib.dump(label_encoder, os.path.join(model_save_path, "label_encoder.pkl"))

print(f"Model saved to: {model_save_path}")
print("Files saved:")
print("- pytorch_model.bin (model weights)")
print("- config.json (model configuration)")
print("- tokenizer.json (tokenizer)")
print("- vocab.txt (vocabulary)")
print("- label_encoder.pkl (label encoder)")

Model saved to: ./finetuned-bertje_emotion_classifier
Files saved:
- pytorch_model.bin (model weights)
- config.json (model configuration)
- tokenizer.json (tokenizer)
- vocab.txt (vocabulary)
- label_encoder.pkl (label encoder)
