In [59]:
!pip install -q transformers datasets

In [60]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [61]:
import pandas as pd
train_df = pd.read_csv("/content/drive/MyDrive/data/semeval2025-11/train/track_a/eng.csv")

In [62]:
train_df.head()

Unnamed: 0,id,text,Anger,Fear,Joy,Sadness,Surprise
0,eng_train_track_a_00001,But not very happy.,0,0,1,1,0
1,eng_train_track_a_00002,Well she's not gon na last the whole song like...,0,0,1,0,0
2,eng_train_track_a_00003,She sat at her Papa's recliner sofa only to mo...,0,0,0,0,0
3,eng_train_track_a_00004,"Yes, the Oklahoma city bombing.",1,1,0,1,1
4,eng_train_track_a_00005,They were dancing to Bolero.,0,0,1,0,0


In [63]:
labels = [label for label in train_df.keys() if label not in ['id', 'text']]
id2label = {idx:label for idx, label in enumerate(labels)}
label2id = {label:idx for idx, label in enumerate(labels)}
labels

['Anger', 'Fear', 'Joy', 'Sadness', 'Surprise']

In [64]:
from transformers import AutoTokenizer
import numpy as np

tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-base")

def preprocess_data(examples):
  # take a batch of texts
  text = examples["text"]
  # encode them
  encoding = tokenizer(text, padding="max_length", truncation=True, max_length=128)
  # add labels
  labels_batch = {k: examples[k] for k in examples.keys() if k in labels}
  # create numpy array of shape (batch_size, num_labels)
  labels_matrix = np.zeros((len(text), len(labels)))
  # fill numpy array
  for idx, label in enumerate(labels):
    labels_matrix[:, idx] = labels_batch[label]

  encoding["labels"] = labels_matrix.tolist()

  return encoding

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [65]:
import datasets
dataset = datasets.Dataset.from_pandas(train_df)
dataset=dataset.train_test_split(test_size=0.3)
dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'text', 'Anger', 'Fear', 'Joy', 'Sadness', 'Surprise'],
        num_rows: 1937
    })
    test: Dataset({
        features: ['id', 'text', 'Anger', 'Fear', 'Joy', 'Sadness', 'Surprise'],
        num_rows: 831
    })
})

In [66]:
encoded_dataset = dataset.map(preprocess_data, batched=True, remove_columns=dataset['train'].column_names)

Map:   0%|          | 0/1937 [00:00<?, ? examples/s]

Map:   0%|          | 0/831 [00:00<?, ? examples/s]

In [67]:
example = encoded_dataset['train'][0]
print(example.keys())

dict_keys(['input_ids', 'attention_mask', 'labels'])


In [68]:
tokenizer.decode(example['input_ids'])

'<s>watching her leave with dudes drove me crazy.</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>'

In [69]:
example['labels']

[1.0, 1.0, 0.0, 1.0, 0.0]

In [70]:
[id2label[idx] for idx, label in enumerate(example['labels']) if label == 1.0]

['Anger', 'Fear', 'Sadness']

In [71]:
encoded_dataset.set_format("torch")

In [72]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained("FacebookAI/roberta-base",
                                                           problem_type="multi_label_classification",
                                                           num_labels=len(labels),
                                                           id2label=id2label,
                                                           label2id=label2id)

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [73]:
batch_size = 8
metric_name = "f1_micro"

In [74]:
from transformers import TrainingArguments, Trainer

args = TrainingArguments(
    f"bert-large-finetuned-sem_eval-english",
    eval_strategy = "epoch",
    save_strategy = "epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=8,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model=metric_name,
    report_to="none"
)

In [75]:
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
from sklearn.metrics import jaccard_score
from transformers import EvalPrediction
import torch

def multi_label_metrics(predictions, labels, threshold=0.5):
    # first, apply sigmoid on predictions which are of shape (batch_size, num_labels)
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(torch.Tensor(predictions))
    # next, use threshold to turn them into integer predictions
    y_pred = np.zeros(probs.shape)
    y_pred[np.where(probs >= threshold)] = 1
    # finally, compute metrics
    y_true = labels
    f1_micro_average = f1_score(y_true=y_true, y_pred=y_pred, average='micro')
    f1_macro_average = f1_score(y_true=y_true, y_pred=y_pred, average='macro')
    roc_auc = roc_auc_score(y_true, y_pred, average = 'micro')
    accuracy = accuracy_score(y_true, y_pred)
    jaccard = jaccard_score(y_true, y_pred, average = 'samples')
    # return as dictionary
    metrics = {'f1_micro': f1_micro_average,
               'f1_macro': f1_macro_average,
               'roc_auc': roc_auc,
               'accuracy': accuracy,
               'jaccard': jaccard}
    return metrics

def compute_metrics(p: EvalPrediction):
    preds = p.predictions[0] if isinstance(p.predictions,
            tuple) else p.predictions
    result = multi_label_metrics(
        predictions=preds,
        labels=p.label_ids)
    return result

In [76]:
encoded_dataset['train'][0]['labels'].type()

'torch.FloatTensor'

In [77]:
encoded_dataset['train']['input_ids'][0]

tensor([    0, 34464,    69,   989,    19, 37108,  4024,   162,  5373,     4,
            2,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1])

In [78]:
#forward pass
outputs = model(input_ids=encoded_dataset['train']['input_ids'][0].unsqueeze(0), labels=encoded_dataset['train'][0]['labels'].unsqueeze(0))
outputs

We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.


SequenceClassifierOutput(loss=tensor(0.6973, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), logits=tensor([[-0.0601,  0.0423,  0.2030,  0.0223, -0.1763]],
       grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [79]:
trainer = Trainer(
    model,
    args,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset["test"],
    processing_class=tokenizer,
    compute_metrics=compute_metrics,
    # report_to="none"
)


In [80]:
trainer.train()

Epoch,Training Loss,Validation Loss,F1 Micro,F1 Macro,Roc Auc,Accuracy,Jaccard
1,No log,0.404221,0.69629,0.574464,0.775073,0.393502,0.552848
2,No log,0.35854,0.734513,0.66208,0.802146,0.434416,0.578018
3,0.424000,0.372219,0.765625,0.711091,0.831267,0.474128,0.630666
4,0.424000,0.351189,0.765558,0.717272,0.826233,0.477738,0.622122
5,0.230700,0.373374,0.77041,0.723654,0.831783,0.487365,0.629342
6,0.230700,0.377319,0.776789,0.73202,0.835445,0.492178,0.62846
7,0.130800,0.384881,0.776899,0.73373,0.833208,0.501805,0.630967
8,0.130800,0.396084,0.775982,0.730167,0.835389,0.495788,0.63197


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


TrainOutput(global_step=1944, training_loss=0.2202391094631619, metrics={'train_runtime': 541.5923, 'train_samples_per_second': 28.612, 'train_steps_per_second': 3.589, 'total_flos': 1019319683905536.0, 'train_loss': 0.2202391094631619, 'epoch': 8.0})

In [81]:
trainer.evaluate()

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'eval_loss': 0.3848811686038971,
 'eval_f1_micro': 0.7768987341772152,
 'eval_f1_macro': 0.7337304350186697,
 'eval_roc_auc': 0.8332082154570062,
 'eval_accuracy': 0.5018050541516246,
 'eval_jaccard': 0.630966706778981,
 'eval_runtime': 5.36,
 'eval_samples_per_second': 155.036,
 'eval_steps_per_second': 19.403,
 'epoch': 8.0}

In [82]:
# save model
trainer.save_model("/content/drive/MyDrive/data/semeval2025-11/roberta-base/")

In [83]:
dev_df = pd.read_csv("/content/drive/MyDrive/data/semeval2025-11/dev/track_a/eng_a.csv")

In [84]:
def predict_labels(text):
  encoding = tokenizer(text, return_tensors="pt")
  encoding = {k: v.to(trainer.model.device) for k,v in encoding.items()}

  outputs = trainer.model(**encoding)
  logits = outputs.logits
  # apply sigmoid + threshold
  sigmoid = torch.nn.Sigmoid()
  probs = sigmoid(logits.squeeze().cpu())
  predictions = np.zeros(probs.shape)
  predictions[np.where(probs >=0.5)] = 1
  if sum(predictions)==0:
    predictions[probs.tolist().index(max(probs.tolist()))]=1
  predictions = [int(item) for item in predictions]
  # turn predicted id's into actual label names
  predicted_labels = [id2label[idx] for idx, label in enumerate(predictions) if label == 1]
  print(predicted_labels, predictions)
  return predicted_labels, predictions

In [85]:
results = []
texts = dev_df['text'].tolist()
ids = dev_df['id'].tolist()
for text,id in zip(texts,ids):
  _,predictions = predict_labels(text)
  result = dict(zip(labels, predictions))
  result['id'] = id
  results.append(result)

['Fear', 'Surprise'] [0, 1, 0, 0, 1]
['Fear', 'Sadness'] [0, 1, 0, 1, 0]
['Fear'] [0, 1, 0, 0, 0]
['Fear', 'Sadness'] [0, 1, 0, 1, 0]
['Fear', 'Surprise'] [0, 1, 0, 0, 1]
['Fear'] [0, 1, 0, 0, 0]
['Anger'] [1, 0, 0, 0, 0]
['Sadness'] [0, 0, 0, 1, 0]
['Joy'] [0, 0, 1, 0, 0]
['Fear', 'Sadness'] [0, 1, 0, 1, 0]
['Joy'] [0, 0, 1, 0, 0]
['Fear', 'Sadness'] [0, 1, 0, 1, 0]
['Anger', 'Sadness'] [1, 0, 0, 1, 0]
['Fear', 'Surprise'] [0, 1, 0, 0, 1]
['Joy'] [0, 0, 1, 0, 0]
['Fear', 'Joy'] [0, 1, 1, 0, 0]
['Anger', 'Fear'] [1, 1, 0, 0, 0]
['Fear', 'Surprise'] [0, 1, 0, 0, 1]
['Fear', 'Surprise'] [0, 1, 0, 0, 1]
['Anger', 'Fear'] [1, 1, 0, 0, 0]
['Joy'] [0, 0, 1, 0, 0]
['Fear'] [0, 1, 0, 0, 0]
['Sadness', 'Surprise'] [0, 0, 0, 1, 1]
['Anger', 'Fear', 'Surprise'] [1, 1, 0, 0, 1]
['Fear'] [0, 1, 0, 0, 0]
['Joy'] [0, 0, 1, 0, 0]
['Fear', 'Surprise'] [0, 1, 0, 0, 1]
['Joy'] [0, 0, 1, 0, 0]
['Joy'] [0, 0, 1, 0, 0]
['Fear', 'Surprise'] [0, 1, 0, 0, 1]
['Fear'] [0, 1, 0, 0, 0]
['Fear'] [0, 1, 0, 0, 0]
['

In [86]:
result_df = pd.DataFrame(results)

In [87]:
#reconstruct to id,Joy,Anger,Sadness,Surprise,Fear
result_df = result_df[['id','Anger', 'Fear', 'Joy', 'Sadness', 'Surprise']]
result_df.to_csv('/content/drive/MyDrive/data/semeval2025-11/pred_eng_a.csv',index=False)