##Monolingual_Bulgarian

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from datasets import Dataset, concatenate_datasets
from transformers import AutoTokenizer
from collections import Counter
import random
import torch
from transformers import DataCollatorWithPadding, EarlyStoppingCallback
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments
from torch.utils.data import DataLoader, WeightedRandomSampler
from pathlib import Path

##Load Data

In [None]:
base_dir = '/content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/data/bulgarian'
train_path = f'{base_dir}/train_bg.tsv'
dev_path   = f'{base_dir}/dev_bg.tsv'
dev_test_path = f'{base_dir}/dev_test_bg.tsv'


In [None]:
train_df = pd.read_csv(train_path, sep='\t')
dev_df   = pd.read_csv(dev_path, sep='\t')
dev_test_df = pd.read_csv(dev_test_path, sep='\t')

##Pre processing

In [None]:
le = LabelEncoder()
train_df['label_id']    = le.fit_transform(train_df['label'])
dev_df['label_id']      = le.transform(dev_df['label'])
dev_test_df['label_id'] = le.transform(dev_test_df['label'])


for df in (train_df, dev_df, dev_test_df):
    df.drop(columns=['label'], inplace=True)

for df in (train_df, dev_df, dev_test_df):
    df.rename(columns={'label_id':'labels'}, inplace=True)

print("Mapped classes:", dict(enumerate(le.classes_)))


Mapped classes: {0: 'OBJ', 1: 'SUBJ'}


In [None]:
train_ds    = Dataset.from_pandas(train_df[['sentence','labels']])
dev_ds      = Dataset.from_pandas(dev_df[['sentence','labels']])
dev_test_ds = Dataset.from_pandas(dev_test_df[['sentence','labels']])

In [None]:
counts = Counter(train_ds['labels'])
n_obj, n_subj = counts[0], counts[1]
print(f"Original counts → OBJ: {n_obj}, SUBJ: {n_subj}")

Original counts → OBJ: 379, SUBJ: 312


## Tokenize

For tokenize data, we use the **mDeBERTaV3_base** model.

In [None]:
model_name = "microsoft/mdeberta-v3-base"
tokenizer  = AutoTokenizer.from_pretrained(model_name)

max_len = 100

def tokenize(batch):
    return tokenizer(batch['sentence'],
                     padding='max_length',
                     truncation=True,
                     max_length=max_len)

train_ds    = train_ds.map(tokenize, batched=True)
dev_ds      = dev_ds.map(tokenize, batched=True)
dev_test_ds = dev_test_ds.map(tokenize, batched=True)

cols = ['input_ids','attention_mask','labels']
train_ds    = train_ds.remove_columns([c for c in train_ds.column_names if c not in cols])
dev_ds      = dev_ds.remove_columns([c for c in dev_ds.column_names if c not in cols])
dev_test_ds = dev_test_ds.remove_columns([c for c in dev_test_ds.column_names if c not in cols])

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Map:   0%|          | 0/691 [00:00<?, ? examples/s]

Map:   0%|          | 0/306 [00:00<?, ? examples/s]

Map:   0%|          | 0/250 [00:00<?, ? examples/s]

##Model

 Define a data collator for dynamic padding and a metrics function to compute per-class precision, recall, F1, and macro F1 score.


In [None]:
data_collator = DataCollatorWithPadding(tokenizer)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = logits.argmax(axis=-1)
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, preds, labels=[0,1], zero_division=0
    )
    return {
        'accuracy': accuracy_score(labels, preds),
        'precision_OBJ': precision[0],
        'recall_OBJ':    recall[0],
        'f1_OBJ':        f1[0],
        'precision_SUBJ':precision[1],
        'recall_SUBJ':   recall[1],
        'f1_SUBJ':       f1[1],
        'macro_f1':      f1.mean()
    }


 Use WeightedRandomSampler to balance class sampling in each batch, and customize Trainer to use this sampler during training.


In [None]:
#Extract train labels (0 or 1)
train_labels = train_ds["labels"]  # a list or array of 0/1


counts = Counter(train_labels)
total  = counts[0] + counts[1]
# weight for OBJ = total/counts[0], for SUBJ = total/counts[1]
weights = [ total / counts[label] for label in train_labels ]

# sampler that samples N = len(train) items with replacement
sampler = WeightedRandomSampler(
    weights      = weights,
    num_samples  = len(weights),
    replacement  = True
)



class SamplerTrainer(Trainer):
    def get_train_dataloader(self):
        return DataLoader(
            self.train_dataset,
            sampler      = sampler,
            batch_size   = self.args.per_device_train_batch_size,
            collate_fn   = self.data_collator,
            num_workers  = self.args.dataloader_num_workers,
            pin_memory   = True,
        )

 Initialize model **(mDeBERTaV3_base)** and training configuration with gradient checkpointing and early stopping.

 Uses a custom SamplerTrainer to address class imbalance, and selects the best model based on macro F1 score.


In [None]:
torch.cuda.empty_cache()
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

model.gradient_checkpointing_enable()

training_args = TrainingArguments(
    output_dir= "/content/results_bg",
    eval_strategy = 'epoch',
    save_strategy       = 'epoch',
    learning_rate       = 2e-5,
    per_device_train_batch_size = 16,
    gradient_accumulation_steps   = 4,
    per_device_eval_batch_size  = 64,
    num_train_epochs          = 6,
    weight_decay              = 0.1,
    warmup_ratio              = 0.1,
    lr_scheduler_type         = "linear",
    label_smoothing_factor    = 0.1,
    max_grad_norm             = 1.0,
    fp16                          = True,
    load_best_model_at_end=True,
    metric_for_best_model = 'macro_f1',
    greater_is_better   = True,
    logging_dir         = './logs_bg',
    logging_steps       = 50,
    logging_strategy = 'epoch' ,
    seed = 42,
)

trainer = SamplerTrainer(
    model           = model,
    args            = training_args,
    train_dataset   = train_ds,
    eval_dataset    = dev_ds,
    tokenizer       = tokenizer,
    data_collator   = data_collator,
    compute_metrics = compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=1)]
)




Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = SamplerTrainer(


Train and fine tuning the model and save the best model.

In [None]:
trainer.train()
output_dir = "/content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/models/Monolingual_bulgarian"
Path(output_dir).mkdir(parents=True, exist_ok=True)
trainer.save_model(output_dir)

print(f"Final model saved to {output_dir}")


Epoch,Training Loss,Validation Loss,Accuracy,Precision Obj,Recall Obj,F1 Obj,Precision Subj,Recall Subj,F1 Subj,Macro F1
1,0.6935,0.69751,0.454248,0.0,0.0,0.0,0.454248,1.0,0.624719,0.31236
2,0.6931,0.688713,0.545752,0.545752,1.0,0.706131,0.0,0.0,0.0,0.353066
3,0.6838,0.667678,0.571895,0.562069,0.976048,0.713348,0.75,0.086331,0.154839,0.434093
4,0.6234,0.604552,0.696078,0.666667,0.886228,0.760925,0.77381,0.467626,0.58296,0.671943
5,0.56,0.564294,0.751634,0.842105,0.670659,0.746667,0.682081,0.848921,0.75641,0.751538
6,0.5059,0.551483,0.761438,0.776471,0.790419,0.783383,0.742647,0.726619,0.734545,0.758964


Final model saved to /content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/models/Monolingual_bulgarian


training and evaluation loss logs for each epoch, then evaluate and display final macro F1 scores on the train and validation sets.


In [None]:
for record in trainer.state.log_history:
    if 'eval_loss' in record or 'loss' in record:
        print(record)

train_metrics = trainer.evaluate(train_ds)
val_metrics   = trainer.evaluate(dev_ds)
print("Train macro-F1:", train_metrics['eval_macro_f1'])
print("Val   macro-F1:", val_metrics['eval_macro_f1'])


{'loss': 0.6935, 'grad_norm': 3.131653070449829, 'learning_rate': 1.898305084745763e-05, 'epoch': 1.0, 'step': 11}
{'eval_loss': 0.6975103616714478, 'eval_accuracy': 0.4542483660130719, 'eval_precision_OBJ': 0.0, 'eval_recall_OBJ': 0.0, 'eval_f1_OBJ': 0.0, 'eval_precision_SUBJ': 0.4542483660130719, 'eval_recall_SUBJ': 1.0, 'eval_f1_SUBJ': 0.6247191011235955, 'eval_macro_f1': 0.31235955056179776, 'eval_runtime': 0.6487, 'eval_samples_per_second': 471.723, 'eval_steps_per_second': 7.708, 'epoch': 1.0, 'step': 11}
{'loss': 0.6931, 'grad_norm': 0.9698556065559387, 'learning_rate': 1.5254237288135594e-05, 'epoch': 2.0, 'step': 22}
{'eval_loss': 0.6887134909629822, 'eval_accuracy': 0.545751633986928, 'eval_precision_OBJ': 0.545751633986928, 'eval_recall_OBJ': 1.0, 'eval_f1_OBJ': 0.7061310782241015, 'eval_precision_SUBJ': 0.0, 'eval_recall_SUBJ': 0.0, 'eval_f1_SUBJ': 0.0, 'eval_macro_f1': 0.35306553911205074, 'eval_runtime': 0.6646, 'eval_samples_per_second': 460.452, 'eval_steps_per_second':

Train macro-F1: 0.8685912064506314
Val   macro-F1: 0.7589641219314809


Load model

In [None]:
output_dir = "/content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/models/Monolingual_bulgarian"
model     = AutoModelForSequenceClassification.from_pretrained(output_dir)
tokenizer = AutoTokenizer.from_pretrained(output_dir)

data_collator = DataCollatorWithPadding(tokenizer)

trainer = Trainer(
    model            = model,
    tokenizer        = tokenizer,
    data_collator   = data_collator,
    compute_metrics = compute_metrics
)

  trainer = Trainer(


#Result

Result for dev test data: **0.73918**

In [None]:
metrics = trainer.evaluate(dev_test_ds)


print("Result of dev_test data")
print(f"OBJ – Precision: {metrics['eval_precision_OBJ']:.5f}, Recall: {metrics['eval_recall_OBJ']:.5f}, F1: {metrics['eval_f1_OBJ']:.5f}")
print(f"SUBJ – Precision: {metrics['eval_precision_SUBJ']:.5f}, Recall: {metrics['eval_recall_SUBJ']:.5f}, F1: {metrics['eval_f1_SUBJ']:.5f}")
print(f"Macro‐F1: {metrics['eval_macro_f1']:.5f}")


Result of dev_test data
OBJ – Precision: 0.78014, Recall: 0.76923, F1: 0.77465
SUBJ – Precision: 0.69725, Recall: 0.71028, F1: 0.70370
Macro‐F1: 0.73918
