##Monolingual_Italian

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from datasets import Dataset, concatenate_datasets
from transformers import AutoTokenizer
from collections import Counter
import random
import torch
from transformers import DataCollatorWithPadding, EarlyStoppingCallback
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments
from torch.utils.data import DataLoader, WeightedRandomSampler
from pathlib import Path

##Load Data

In [None]:
base_dir = '/content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/data/italian'
train_path = f'{base_dir}/train_it.tsv'
dev_path   = f'{base_dir}/dev_it.tsv'
dev_test_path = f'{base_dir}/dev_test_it.tsv'
test_path = f'{base_dir}/test_it_labeled.tsv'
test_unlabeled_path = f'{base_dir}/test_it_unlabeled.tsv'

In [None]:
train_df = pd.read_csv(train_path, sep='\t')
dev_df   = pd.read_csv(dev_path, sep='\t')
dev_test_df = pd.read_csv(dev_test_path, sep='\t')
test_df = pd.read_csv(test_path, sep='\t')
test_unlabeled_df = pd.read_csv(test_unlabeled_path, sep='\t')

##Pre processing

In [None]:
le = LabelEncoder()
train_df['label_id']    = le.fit_transform(train_df['label'])
dev_df['label_id']      = le.transform(dev_df['label'])
dev_test_df['label_id'] = le.transform(dev_test_df['label'])
test_df['label_id']     = le.transform(test_df['label'])

for df in (train_df, dev_df, dev_test_df, test_df):
    df.drop(columns=['label'], inplace=True)

for df in (train_df, dev_df, dev_test_df, test_df):
    df.rename(columns={'label_id':'labels'}, inplace=True)

print("Mapped classes:", dict(enumerate(le.classes_)))


Mapped classes: {0: 'OBJ', 1: 'SUBJ'}


In [None]:
train_ds    = Dataset.from_pandas(train_df[['sentence','labels']])
dev_ds      = Dataset.from_pandas(dev_df[['sentence','labels']])
dev_test_ds = Dataset.from_pandas(dev_test_df[['sentence','labels']])
test_ds = Dataset.from_pandas(test_df[['sentence','labels']])
test_unlabeled_ds = Dataset.from_pandas(test_unlabeled_df[['sentence']])

In [None]:
counts = Counter(train_ds['labels'])
n_obj, n_subj = counts[0], counts[1]
print(f"Original counts → OBJ: {n_obj}, SUBJ: {n_subj}")

Original counts → OBJ: 1231, SUBJ: 382


#First Model

For tokenize data, we use the **musixmatch/umberto-commoncrawl-cased-v1** model.

In [None]:
model_name = "musixmatch/umberto-commoncrawl-cased-v1"
tokenizer  = AutoTokenizer.from_pretrained(model_name)

max_len = 100

def tokenize(batch):
    return tokenizer(batch['sentence'],
                     padding='max_length',
                     truncation=True,
                     max_length=max_len)

train_ds    = train_ds.map(tokenize, batched=True)
dev_ds      = dev_ds.map(tokenize, batched=True)
dev_test_ds = dev_test_ds.map(tokenize, batched=True)
test_ds     = test_ds.map(tokenize, batched=True)
test_unlabeled_ds = test_unlabeled_ds.map(tokenize, batched=True)

cols = ['input_ids','attention_mask','labels']
train_ds    = train_ds.remove_columns([c for c in train_ds.column_names if c not in cols])
dev_ds      = dev_ds.remove_columns([c for c in dev_ds.column_names if c not in cols])
dev_test_ds = dev_test_ds.remove_columns([c for c in dev_test_ds.column_names if c not in cols])
test_ds     = test_ds.remove_columns([c for c in test_ds.column_names if c not in cols])
test_unlabeled_ds = test_unlabeled_ds.remove_columns(
    [c for c in test_unlabeled_ds.column_names if c not in ['input_ids','attention_mask']])

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/508 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/794k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Map:   0%|          | 0/1613 [00:00<?, ? examples/s]

Map:   0%|          | 0/667 [00:00<?, ? examples/s]

Map:   0%|          | 0/462 [00:00<?, ? examples/s]

Map:   0%|          | 0/299 [00:00<?, ? examples/s]

Map:   0%|          | 0/299 [00:00<?, ? examples/s]

 Define a data collator for dynamic padding and a metrics function to compute per-class precision, recall, F1, and macro F1 score.


In [None]:
data_collator = DataCollatorWithPadding(tokenizer)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = logits.argmax(axis=-1)
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, preds, labels=[0,1], zero_division=0
    )
    return {
        'accuracy': accuracy_score(labels, preds),
        'precision_OBJ': precision[0],
        'recall_OBJ':    recall[0],
        'f1_OBJ':        f1[0],
        'precision_SUBJ':precision[1],
        'recall_SUBJ':   recall[1],
        'f1_SUBJ':       f1[1],
        'macro_f1':      f1.mean()
    }


 Use WeightedRandomSampler to balance class sampling in each batch, and customize Trainer to use this sampler during training.


In [None]:
# Extract train labels (0 or 1)
train_labels = train_ds["labels"]  # a list or array of 0/1


counts = Counter(train_labels)
total  = counts[0] + counts[1]
# weight for OBJ = total/counts[0], for SUBJ = total/counts[1]
weights = [ total / counts[label] for label in train_labels ]

# sampler that samples N = len(train) items with replacement
sampler = WeightedRandomSampler(
    weights      = weights,
    num_samples  = len(weights),
    replacement  = True
)



class SamplerTrainer(Trainer):
    def get_train_dataloader(self):
        return DataLoader(
            self.train_dataset,
            sampler      = sampler,
            batch_size   = self.args.per_device_train_batch_size,
            collate_fn   = self.data_collator,
            num_workers  = self.args.dataloader_num_workers,
            pin_memory   = True,
        )

 Initialize model **(musixmatch/umberto-commoncrawl-cased-v1)** and training configuration with gradient checkpointing and early stopping.

 Uses a custom SamplerTrainer to address class imbalance, and selects the best model based on macro F1 score.


In [None]:
torch.cuda.empty_cache()
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

model.gradient_checkpointing_enable()

training_args = TrainingArguments(
    output_dir= "/content/results_it",
    eval_strategy = 'epoch',
    save_strategy       = 'epoch',
    learning_rate       = 5e-5,
    per_device_train_batch_size = 32,
    gradient_accumulation_steps   = 2,
    per_device_eval_batch_size  = 64,
    num_train_epochs          = 6,
    weight_decay              = 0.1,
    warmup_ratio              = 0.15,
    lr_scheduler_type         = "linear",
    label_smoothing_factor    = 0.1,
    max_grad_norm             = 1.0,
    fp16                          = True,
    load_best_model_at_end=True,
    metric_for_best_model = 'macro_f1',
    greater_is_better   = True,
    logging_dir         = './logs_it',
    logging_steps       = 50,
    logging_strategy = 'epoch' ,
    seed = 42,
)

trainer = SamplerTrainer(
    model           = model,
    args            = training_args,
    train_dataset   = train_ds,
    eval_dataset    = dev_ds,
    tokenizer       = tokenizer,
    data_collator   = data_collator,
    compute_metrics = compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=1)]
)


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at musixmatch/umberto-commoncrawl-cased-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = SamplerTrainer(


Train and fine tuning the model and save the best model.

In [None]:
trainer.train()
output_dir = "/content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/models/Monolingual_italian"
Path(output_dir).mkdir(parents=True, exist_ok=True)
trainer.save_model(output_dir)

print(f"Final model saved to {output_dir}")


Epoch,Training Loss,Validation Loss,Accuracy,Precision Obj,Recall Obj,F1 Obj,Precision Subj,Recall Subj,F1 Subj,Macro F1
1,0.6674,0.661962,0.590705,0.883392,0.510204,0.646831,0.375,0.813559,0.513369,0.5801
2,0.5421,0.710614,0.595202,0.943548,0.477551,0.634146,0.389021,0.920904,0.54698,0.590563
3,0.4143,0.615872,0.754123,0.897561,0.75102,0.817778,0.525292,0.762712,0.62212,0.719949
4,0.3622,0.544527,0.778111,0.866953,0.82449,0.845188,0.572139,0.649718,0.608466,0.726827
5,0.2935,0.611802,0.766117,0.858369,0.816327,0.83682,0.552239,0.627119,0.587302,0.712061


Final model saved to /content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/models/Monolingual_italian


training and evaluation loss logs for each epoch, then evaluate and display final macro F1 scores on the train and validation sets.


In [None]:
for record in trainer.state.log_history:
    if 'eval_loss' in record or 'loss' in record:
        print(record)

train_metrics = trainer.evaluate(train_ds)
val_metrics   = trainer.evaluate(dev_ds)
print("Train macro-F1:", train_metrics['eval_macro_f1'])
print("Val   macro-F1:", val_metrics['eval_macro_f1'])


{'loss': 0.6674, 'grad_norm': 1.0850462913513184, 'learning_rate': 4.962121212121213e-05, 'epoch': 1.0, 'step': 26}
{'eval_loss': 0.6619623899459839, 'eval_accuracy': 0.5907046476761619, 'eval_precision_OBJ': 0.8833922261484098, 'eval_recall_OBJ': 0.5102040816326531, 'eval_f1_OBJ': 0.6468305304010349, 'eval_precision_SUBJ': 0.375, 'eval_recall_SUBJ': 0.8135593220338984, 'eval_f1_SUBJ': 0.5133689839572193, 'eval_macro_f1': 0.5800997571791271, 'eval_runtime': 0.8418, 'eval_samples_per_second': 792.378, 'eval_steps_per_second': 13.068, 'epoch': 1.0, 'step': 26}
{'loss': 0.5421, 'grad_norm': 2.3427186012268066, 'learning_rate': 3.9772727272727275e-05, 'epoch': 2.0, 'step': 52}
{'eval_loss': 0.710614025592804, 'eval_accuracy': 0.5952023988005997, 'eval_precision_OBJ': 0.9435483870967742, 'eval_recall_OBJ': 0.4775510204081633, 'eval_f1_OBJ': 0.6341463414634146, 'eval_precision_SUBJ': 0.38902147971360385, 'eval_recall_SUBJ': 0.9209039548022598, 'eval_f1_SUBJ': 0.5469798657718121, 'eval_macro_

Train macro-F1: 0.9228856134502668
Val   macro-F1: 0.7268269464922185


In [None]:
output_dir = "/content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/models/Monolingual_italian"
model     = AutoModelForSequenceClassification.from_pretrained(output_dir)
tokenizer = AutoTokenizer.from_pretrained(output_dir)

data_collator = DataCollatorWithPadding(tokenizer)

trainer = Trainer(
    model            = model,
    tokenizer        = tokenizer,
    data_collator   = data_collator,
    compute_metrics = compute_metrics
)

  trainer = Trainer(


##Result

Result for test data(labeled): **Macro F1: 0.73341**



In [None]:

metrics = trainer.evaluate(test_ds)

print("Result of test data")
print(f"OBJ – Precision: {metrics['eval_precision_OBJ']:.5f}, Recall: {metrics['eval_recall_OBJ']:.5f}, F1: {metrics['eval_f1_OBJ']:.5f}")
print(f"SUBJ – Precision: {metrics['eval_precision_SUBJ']:.5f}, Recall: {metrics['eval_recall_SUBJ']:.5f}, F1: {metrics['eval_f1_SUBJ']:.5f}")
print(f"Macro‐F1: {metrics['eval_macro_f1']:.5f}")


Result of test data
OBJ – Precision: 0.80000, Recall: 0.83333, F1: 0.81633
SUBJ – Precision: 0.67677, Recall: 0.62617, F1: 0.65049
Macro‐F1: 0.73341


Result for dev test data

In [None]:
metrics = trainer.evaluate(dev_test_ds)

print("Result of dev_test data")
print(f"OBJ – Precision: {metrics['eval_precision_OBJ']:.5f}, Recall: {metrics['eval_recall_OBJ']:.5f}, F1: {metrics['eval_f1_OBJ']:.5f}")
print(f"SUBJ – Precision: {metrics['eval_precision_SUBJ']:.5f}, Recall: {metrics['eval_recall_SUBJ']:.5f}, F1: {metrics['eval_f1_SUBJ']:.5f}")
print(f"Macro‐F1: {metrics['eval_macro_f1']:.5f}")


Result of dev_test data
OBJ – Precision: 0.87179, Recall: 0.91617, F1: 0.89343
SUBJ – Precision: 0.74775, Recall: 0.64844, F1: 0.69456
Macro‐F1: 0.79400


Prediction for test unlabeled data and save it

In [None]:
pred_out = trainer.predict(test_unlabeled_ds)
logits   = pred_out.predictions
pred_ids = logits.argmax(axis=-1)


pred_labels = le.inverse_transform(pred_ids)

df = pd.DataFrame({
    'sentence': test_unlabeled_df['sentence'],
    'prediction': pred_labels
})
save_path = "/content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/unlabeld_predict/italian/italian_predictions.tsv"
df.to_csv(save_path, sep='\t', index=False)

print(f"Saved predictions to {save_path}")


Saved predictions to /content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/unlabeld_predict/italian/italian_predictions.tsv


#Second Model

For tokenize data, we use the **mdeberta-v3-base** model.

In [None]:
model_name = "microsoft/mdeberta-v3-base"
tokenizer  = AutoTokenizer.from_pretrained(model_name)

max_len = 100

def tokenize(batch):
    return tokenizer(batch['sentence'],
                     padding='max_length',
                     truncation=True,
                     max_length=max_len)

train_ds    = train_ds.map(tokenize, batched=True)
dev_ds      = dev_ds.map(tokenize, batched=True)
dev_test_ds = dev_test_ds.map(tokenize, batched=True)
test_ds     = test_ds.map(tokenize, batched=True)
test_unlabeled_ds = test_unlabeled_ds.map(tokenize, batched=True)

cols = ['input_ids','attention_mask','labels']
train_ds    = train_ds.remove_columns([c for c in train_ds.column_names if c not in cols])
dev_ds      = dev_ds.remove_columns([c for c in dev_ds.column_names if c not in cols])
dev_test_ds = dev_test_ds.remove_columns([c for c in dev_test_ds.column_names if c not in cols])
test_ds     = test_ds.remove_columns([c for c in test_ds.column_names if c not in cols])
test_unlabeled_ds = test_unlabeled_ds.remove_columns(
    [c for c in test_unlabeled_ds.column_names if c not in ['input_ids','attention_mask']])

tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/579 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/4.31M [00:00<?, ?B/s]



Map:   0%|          | 0/1613 [00:00<?, ? examples/s]

Map:   0%|          | 0/667 [00:00<?, ? examples/s]

Map:   0%|          | 0/462 [00:00<?, ? examples/s]

Map:   0%|          | 0/299 [00:00<?, ? examples/s]

Map:   0%|          | 0/299 [00:00<?, ? examples/s]

In [None]:
data_collator = DataCollatorWithPadding(tokenizer)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = logits.argmax(axis=-1)
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, preds, labels=[0,1], zero_division=0
    )
    return {
        'accuracy': accuracy_score(labels, preds),
        'precision_OBJ': precision[0],
        'recall_OBJ':    recall[0],
        'f1_OBJ':        f1[0],
        'precision_SUBJ':precision[1],
        'recall_SUBJ':   recall[1],
        'f1_SUBJ':       f1[1],
        'macro_f1':      f1.mean()
    }


In [None]:
#  Extract train labels (0 or 1)
train_labels = train_ds["labels"]  # a list or array of 0/1


counts = Counter(train_labels)
total  = counts[0] + counts[1]
# weight for OBJ = total/counts[0], for SUBJ = total/counts[1]
weights = [ total / counts[label] for label in train_labels ]

# sampler that samples N = len(train) items with replacement
sampler = WeightedRandomSampler(
    weights      = weights,
    num_samples  = len(weights),
    replacement  = True
)



class SamplerTrainer(Trainer):
    def get_train_dataloader(self):
        return DataLoader(
            self.train_dataset,
            sampler      = sampler,
            batch_size   = self.args.per_device_train_batch_size,
            collate_fn   = self.data_collator,
            num_workers  = self.args.dataloader_num_workers,
            pin_memory   = True,
        )

In [None]:
torch.cuda.empty_cache()
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

model.gradient_checkpointing_enable()

training_args = TrainingArguments(
    output_dir= "/content/results1_it",
    eval_strategy = 'epoch',
    save_strategy       = 'epoch',
    learning_rate       = 5e-5,
    per_device_train_batch_size = 32,
    gradient_accumulation_steps   = 2,
    per_device_eval_batch_size  = 64,
    num_train_epochs          = 6,
    weight_decay              = 0.1,
    warmup_ratio              = 0.15,
    lr_scheduler_type         = "linear",
    label_smoothing_factor    = 0.1,
    max_grad_norm             = 1.0,
    fp16                          = True,
    load_best_model_at_end=True,
    metric_for_best_model = 'macro_f1',
    greater_is_better   = True,
    logging_dir         = './logs_it',
    logging_steps       = 50,
    logging_strategy = 'epoch' ,
    seed = 42,
)

trainer = SamplerTrainer(
    model           = model,
    args            = training_args,
    train_dataset   = train_ds,
    eval_dataset    = dev_ds,
    tokenizer       = tokenizer,
    data_collator   = data_collator,
    compute_metrics = compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=1)]
)




Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = SamplerTrainer(


In [None]:
trainer.train()
output_dir = "/content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/models/Monolingual_italian1"
Path(output_dir).mkdir(parents=True, exist_ok=True)
trainer.save_model(output_dir)

print(f"Final model saved to {output_dir}")


Epoch,Training Loss,Validation Loss,Accuracy,Precision Obj,Recall Obj,F1 Obj,Precision Subj,Recall Subj,F1 Subj,Macro F1
1,0.6694,0.688871,0.587706,0.946058,0.465306,0.623803,0.384977,0.926554,0.543947,0.583875
2,0.5218,0.640166,0.710645,0.943284,0.644898,0.766061,0.475904,0.892655,0.620825,0.693443
3,0.3803,0.52615,0.814093,0.899563,0.840816,0.869198,0.626794,0.740113,0.678756,0.773977
4,0.3206,0.540069,0.796102,0.895089,0.818367,0.855011,0.593607,0.734463,0.656566,0.755788


Final model saved to /content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/models/Monolingual_italian1


In [None]:
for record in trainer.state.log_history:
    if 'eval_loss' in record or 'loss' in record:
        print(record)

train_metrics = trainer.evaluate(train_ds)
val_metrics   = trainer.evaluate(dev_ds)
print("Train macro-F1:", train_metrics['eval_macro_f1'])
print("Val   macro-F1:", val_metrics['eval_macro_f1'])


{'loss': 0.6694, 'grad_norm': 5.452830791473389, 'learning_rate': 4.962121212121213e-05, 'epoch': 1.0, 'step': 26}
{'eval_loss': 0.688871443271637, 'eval_accuracy': 0.5877061469265368, 'eval_precision_OBJ': 0.946058091286307, 'eval_recall_OBJ': 0.46530612244897956, 'eval_f1_OBJ': 0.6238030095759234, 'eval_precision_SUBJ': 0.38497652582159625, 'eval_recall_SUBJ': 0.9265536723163842, 'eval_f1_SUBJ': 0.5439469320066335, 'eval_macro_f1': 0.5838749707912785, 'eval_runtime': 1.4405, 'eval_samples_per_second': 463.018, 'eval_steps_per_second': 7.636, 'epoch': 1.0, 'step': 26}
{'loss': 0.5218, 'grad_norm': 5.016152381896973, 'learning_rate': 3.9772727272727275e-05, 'epoch': 2.0, 'step': 52}
{'eval_loss': 0.6401657462120056, 'eval_accuracy': 0.7106446776611695, 'eval_precision_OBJ': 0.9432835820895522, 'eval_recall_OBJ': 0.6448979591836734, 'eval_f1_OBJ': 0.7660606060606061, 'eval_precision_SUBJ': 0.4759036144578313, 'eval_recall_SUBJ': 0.8926553672316384, 'eval_f1_SUBJ': 0.6208251473477406, 'e

Train macro-F1: 0.9177898760569907
Val   macro-F1: 0.7739773944601124


In [None]:

output_dir = "/content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/models/Monolingual_italian1"
model     = AutoModelForSequenceClassification.from_pretrained(output_dir)
tokenizer = AutoTokenizer.from_pretrained(output_dir)


data_collator = DataCollatorWithPadding(tokenizer)


trainer = Trainer(
    model            = model,
    tokenizer        = tokenizer,
    data_collator   = data_collator,
    compute_metrics = compute_metrics
)

  trainer = Trainer(


##Result

Result for test data(labeled): **Macro F1: 0.77075**



In [None]:

metrics = trainer.evaluate(test_ds)


print("Result of test data")
print(f"OBJ – Precision: {metrics['eval_precision_OBJ']:.5f}, Recall: {metrics['eval_recall_OBJ']:.5f}, F1: {metrics['eval_f1_OBJ']:.5f}")
print(f"SUBJ – Precision: {metrics['eval_precision_SUBJ']:.5f}, Recall: {metrics['eval_recall_SUBJ']:.5f}, F1: {metrics['eval_f1_SUBJ']:.5f}")
print(f"Macro‐F1: {metrics['eval_macro_f1']:.5f}")


Result of test data
OBJ – Precision: 0.84783, Recall: 0.81250, F1: 0.82979
SUBJ – Precision: 0.68696, Recall: 0.73832, F1: 0.71171
Macro‐F1: 0.77075


Result for dev test data

In [None]:
metrics = trainer.evaluate(dev_test_ds)

print("Result of dev_test data")
print(f"OBJ – Precision: {metrics['eval_precision_OBJ']:.5f}, Recall: {metrics['eval_recall_OBJ']:.5f}, F1: {metrics['eval_f1_OBJ']:.5f}")
print(f"SUBJ – Precision: {metrics['eval_precision_SUBJ']:.5f}, Recall: {metrics['eval_recall_SUBJ']:.5f}, F1: {metrics['eval_f1_SUBJ']:.5f}")
print(f"Macro‐F1: {metrics['eval_macro_f1']:.5f}")


Result of dev_test data
OBJ – Precision: 0.88663, Recall: 0.91317, F1: 0.89971
SUBJ – Precision: 0.75424, Recall: 0.69531, F1: 0.72358
Macro‐F1: 0.81164


Prediction for test unlabeled data and save it

In [None]:
pred_out = trainer.predict(test_unlabeled_ds)
logits   = pred_out.predictions
pred_ids = logits.argmax(axis=-1)


pred_labels = le.inverse_transform(pred_ids)


df = pd.DataFrame({
    'sentence': test_unlabeled_df['sentence'],
    'prediction': pred_labels
})
save_path = "/content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/unlabeld_predict/italian/italian_predictions1.tsv"
df.to_csv(save_path, sep='\t', index=False)

print(f"Saved predictions to {save_path}")


Saved predictions to /content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/unlabeld_predict/italian/italian_predictions1.tsv
