In [None]:
import torch
import numpy as np
import pandas as pd
import time
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments, EarlyStoppingCallback
from sklearn.metrics import accuracy_score, precision_recall_fscore_support


train_df = pd.read_csv("ISEAR_train.csv")
test_df = pd.read_csv("ISEAR_test.csv")


unique_emotions = sorted(train_df["emotion"].unique())
emotion2label = {emotion: idx for idx, emotion in enumerate(unique_emotions)}
label2emotion = {idx: emotion for emotion, idx in emotion2label.items()}

train_df["label"] = train_df["emotion"].map(emotion2label)
test_df["label"] = test_df["emotion"].map(emotion2label)

train_dataset = Dataset.from_pandas(train_df[["text", "label"]])
test_dataset = Dataset.from_pandas(test_df[["text", "label"]])


model_name = "distilroberta-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)

train_dataset = train_dataset.map(tokenize_function, batched=True)
test_dataset = test_dataset.map(tokenize_function, batched=True)

train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
test_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])


num_labels = len(emotion2label)
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=num_labels,
    hidden_dropout_prob=0.3,  # Dropout pre skryté vrstvy
    attention_probs_dropout_prob=0.3  # Dropout pre pozornosť
)


training_args = TrainingArguments(
    output_dir="./distilroberta_finetuned",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=10,
    weight_decay=0.1,  # Zvýšenie weight decay pre silnejšiu regularizáciu
    logging_dir="./logs",
    logging_steps=10,
    load_best_model_at_end=True,  # Načítanie najlepšieho modelu
    metric_for_best_model="eval_loss",  # Metrika pre early stopping
    greater_is_better=False,  # Nižšia validačná strata je lepšia
)


def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = accuracy_score(labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average="weighted")
    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]  # Pridanie early stopping
)


start_time = time.time()

trainer.train()

end_time = time.time()
training_time = end_time - start_time

print(f" Training completed!  Training time: {training_time / 60:.2f} minutes")


model.save_pretrained("./distilroberta_finetuned")
tokenizer.save_pretrained("./distilroberta_finetuned")


results = trainer.evaluate()
print(" DistilRoBERTa Evaluation Results:")
print(results)

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/480 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Map:   0%|          | 0/6124 [00:00<?, ? examples/s]

Map:   0%|          | 0/1532 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/331M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


⏳ Starting DistilRoBERTa fine-tuning on ISEAR dataset...


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.377,1.225775,0.545692,0.562995,0.545692,0.530622
2,1.1106,1.016539,0.644256,0.66277,0.644256,0.639168
3,1.0579,0.956602,0.672324,0.678959,0.672324,0.66941
4,0.9378,0.916468,0.693211,0.690744,0.693211,0.689836
5,1.1008,0.917372,0.693211,0.704171,0.693211,0.695739
6,0.8069,0.928236,0.700392,0.700058,0.700392,0.698299
7,0.8199,0.898415,0.70953,0.711381,0.70953,0.709603
8,0.6245,0.913059,0.705614,0.709407,0.705614,0.705723
9,0.7862,0.905372,0.710183,0.711392,0.710183,0.710045
10,0.7536,0.90591,0.710183,0.711856,0.710183,0.710221


✅ Training completed! 🕒 Training time: 4.28 minutes


📊 DistilRoBERTa Evaluation Results:
{'eval_loss': 0.8984150290489197, 'eval_accuracy': 0.7095300261096605, 'eval_precision': 0.711381142454905, 'eval_recall': 0.7095300261096605, 'eval_f1': 0.7096031638875056, 'eval_runtime': 1.371, 'eval_samples_per_second': 1117.44, 'eval_steps_per_second': 70.022, 'epoch': 10.0}
