##Monolingual_German

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from datasets import Dataset, concatenate_datasets
from transformers import AutoTokenizer
from collections import Counter
import random
import torch
from transformers import DataCollatorWithPadding, EarlyStoppingCallback
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments
from torch.utils.data import DataLoader, WeightedRandomSampler
from pathlib import Path

##Load Data

In [None]:
base_dir = '/content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/data/german'
train_path = f'{base_dir}/train_de.tsv'
dev_path   = f'{base_dir}/dev_de.tsv'
dev_test_path = f'{base_dir}/dev_test_de.tsv'
test_path = f'{base_dir}/test_de_labeled.tsv'
test_unlabeled_path = f'{base_dir}/test_de_unlabeled.tsv'

In [None]:
train_df = pd.read_csv(train_path, sep='\t')
dev_df   = pd.read_csv(dev_path, sep='\t')
dev_test_df = pd.read_csv(dev_test_path, sep='\t')
test_df = pd.read_csv(test_path, sep='\t')
test_unlabeled_df = pd.read_csv(test_unlabeled_path, sep='\t')

##Pre processing

In [None]:
le = LabelEncoder()
train_df['label_id']    = le.fit_transform(train_df['label'])
dev_df['label_id']      = le.transform(dev_df['label'])
dev_test_df['label_id'] = le.transform(dev_test_df['label'])
test_df['label_id']     = le.transform(test_df['label'])

for df in (train_df, dev_df, dev_test_df, test_df):
    df.drop(columns=['label'], inplace=True)

for df in (train_df, dev_df, dev_test_df, test_df):
    df.rename(columns={'label_id':'labels'}, inplace=True)

print("Mapped classes:", dict(enumerate(le.classes_)))


Mapped classes: {0: 'OBJ', 1: 'SUBJ'}


In [None]:
train_ds    = Dataset.from_pandas(train_df[['sentence','labels']])
dev_ds      = Dataset.from_pandas(dev_df[['sentence','labels']])
dev_test_ds = Dataset.from_pandas(dev_test_df[['sentence','labels']])
test_ds = Dataset.from_pandas(test_df[['sentence','labels']])
test_unlabeled_ds = Dataset.from_pandas(test_unlabeled_df[['sentence']])

In [None]:
counts = Counter(train_ds['labels'])
n_obj, n_subj = counts[0], counts[1]
print(f"Original counts → OBJ: {n_obj}, SUBJ: {n_subj}")

Original counts → OBJ: 492, SUBJ: 308


#First Model

For tokenize data, we use the **dbmdz/bert-base-german-cased** model.

In [None]:
model_name = "dbmdz/bert-base-german-cased"
tokenizer  = AutoTokenizer.from_pretrained(model_name)

max_len = 100

def tokenize(batch):
    return tokenizer(batch['sentence'],
                     padding='max_length',
                     truncation=True,
                     max_length=max_len)

train_ds    = train_ds.map(tokenize, batched=True)
dev_ds      = dev_ds.map(tokenize, batched=True)
dev_test_ds = dev_test_ds.map(tokenize, batched=True)
test_ds     = test_ds.map(tokenize, batched=True)
test_unlabeled_ds = test_unlabeled_ds.map(tokenize, batched=True)

cols = ['input_ids','attention_mask','labels']
train_ds    = train_ds.remove_columns([c for c in train_ds.column_names if c not in cols])
dev_ds      = dev_ds.remove_columns([c for c in dev_ds.column_names if c not in cols])
dev_test_ds = dev_test_ds.remove_columns([c for c in dev_test_ds.column_names if c not in cols])
test_ds     = test_ds.remove_columns([c for c in test_ds.column_names if c not in cols])
test_unlabeled_ds = test_unlabeled_ds.remove_columns(
    [c for c in test_unlabeled_ds.column_names if c not in ['input_ids','attention_mask']])

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/491 [00:00<?, ? examples/s]

Map:   0%|          | 0/224 [00:00<?, ? examples/s]

Map:   0%|          | 0/347 [00:00<?, ? examples/s]

Map:   0%|          | 0/347 [00:00<?, ? examples/s]

 Define a data collator for dynamic padding and a metrics function to compute per-class precision, recall, F1, and macro F1 score.


In [None]:
data_collator = DataCollatorWithPadding(tokenizer)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = logits.argmax(axis=-1)
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, preds, labels=[0,1], zero_division=0
    )
    return {
        'accuracy': accuracy_score(labels, preds),
        'precision_OBJ': precision[0],
        'recall_OBJ':    recall[0],
        'f1_OBJ':        f1[0],
        'precision_SUBJ':precision[1],
        'recall_SUBJ':   recall[1],
        'f1_SUBJ':       f1[1],
        'macro_f1':      f1.mean()
    }


 Use WeightedRandomSampler to balance class sampling in each batch, and customize Trainer to use this sampler during training.


In [None]:
# Extract train labels (0 or 1)
train_labels = train_ds["labels"]  # a list or array of 0/1


counts = Counter(train_labels)
total  = counts[0] + counts[1]
# weight for OBJ = total/counts[0], for SUBJ = total/counts[1]
weights = [ total / counts[label] for label in train_labels ]

# sampler that samples N = len(train) items with replacement
sampler = WeightedRandomSampler(
    weights      = weights,
    num_samples  = len(weights),
    replacement  = True
)



class SamplerTrainer(Trainer):
    def get_train_dataloader(self):
        return DataLoader(
            self.train_dataset,
            sampler      = sampler,
            batch_size   = self.args.per_device_train_batch_size,
            collate_fn   = self.data_collator,
            num_workers  = self.args.dataloader_num_workers,
            pin_memory   = True,
        )

 Initialize model **(dbmdz/bert-base-german-cased)** and training configuration with gradient checkpointing and early stopping.

 Uses a custom SamplerTrainer to address class imbalance, and selects the best model based on macro F1 score.


In [None]:
torch.cuda.empty_cache()
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
model.gradient_checkpointing_enable()

training_args = TrainingArguments(
    output_dir= "/content/results_de",
    eval_strategy = 'epoch',
    save_strategy       = 'epoch',
    learning_rate       = 2e-5,
    per_device_train_batch_size = 16,
    gradient_accumulation_steps   = 4,
    per_device_eval_batch_size  = 64,
    num_train_epochs          = 6,
    weight_decay              = 0.1,
    warmup_ratio              = 0.1,
    lr_scheduler_type         = "linear",
    label_smoothing_factor    = 0.1,
    max_grad_norm             = 1.0,
    fp16                          = True,
    load_best_model_at_end=True,
    metric_for_best_model = 'macro_f1',
    greater_is_better   = True,
    logging_dir         = './logs_de',
    logging_steps       = 50,
    logging_strategy = 'epoch' ,
    seed = 42,
)

trainer = SamplerTrainer(
    model           = model,
    args            = training_args,
    train_dataset   = train_ds,
    eval_dataset    = dev_ds,
    tokenizer       = tokenizer,
    data_collator   = data_collator,
    compute_metrics = compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=1)]
)




Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-german-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = SamplerTrainer(


Train the model and save the best model.

In [None]:
trainer.train()
output_dir = "/content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/models/Monolingual_german"
Path(output_dir).mkdir(parents=True, exist_ok=True)
trainer.save_model(output_dir)

print(f"Final model saved to {output_dir}")


Epoch,Training Loss,Validation Loss,Accuracy,Precision Obj,Recall Obj,F1 Obj,Precision Subj,Recall Subj,F1 Subj,Macro F1
1,0.6438,0.653741,0.631365,0.806306,0.564669,0.664193,0.486989,0.752874,0.591422,0.627808
2,0.5236,0.563025,0.733198,0.894068,0.665615,0.76311,0.584314,0.856322,0.694639,0.728875
3,0.4382,0.543764,0.775967,0.893536,0.741325,0.810345,0.640351,0.83908,0.726368,0.768356
4,0.3483,0.523679,0.796334,0.851133,0.829653,0.840256,0.703297,0.735632,0.719101,0.779678
5,0.3155,0.553903,0.792261,0.879859,0.785489,0.83,0.673077,0.804598,0.732984,0.781492
6,0.2981,0.54444,0.800407,0.859016,0.826498,0.842444,0.704301,0.752874,0.727778,0.785111


Final model saved to /content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/models/Monolingual_german


training and evaluation loss logs for each epoch, then evaluate and display final macro F1 scores on the train and validation sets.


In [None]:
for record in trainer.state.log_history:
    if 'eval_loss' in record or 'loss' in record:
        print(record)

train_metrics = trainer.evaluate(train_ds)
val_metrics   = trainer.evaluate(dev_ds)
print("Train macro-F1:", train_metrics['eval_macro_f1'])
print("Val   macro-F1:", val_metrics['eval_macro_f1'])


{'loss': 0.6438, 'grad_norm': 1.2293331623077393, 'learning_rate': 1.885714285714286e-05, 'epoch': 1.0, 'step': 13}
{'eval_loss': 0.6537414193153381, 'eval_accuracy': 0.6313645621181263, 'eval_precision_OBJ': 0.8063063063063063, 'eval_recall_OBJ': 0.5646687697160884, 'eval_f1_OBJ': 0.6641929499072357, 'eval_precision_SUBJ': 0.48698884758364314, 'eval_recall_SUBJ': 0.7528735632183908, 'eval_f1_SUBJ': 0.5914221218961625, 'eval_macro_f1': 0.627807535901699, 'eval_runtime': 0.6472, 'eval_samples_per_second': 758.659, 'eval_steps_per_second': 12.361, 'epoch': 1.0, 'step': 13}
{'loss': 0.5236, 'grad_norm': 1.687044382095337, 'learning_rate': 1.5142857142857144e-05, 'epoch': 2.0, 'step': 26}
{'eval_loss': 0.5630254149436951, 'eval_accuracy': 0.7331975560081466, 'eval_precision_OBJ': 0.8940677966101694, 'eval_recall_OBJ': 0.6656151419558359, 'eval_f1_OBJ': 0.7631103074141049, 'eval_precision_SUBJ': 0.5843137254901961, 'eval_recall_SUBJ': 0.8563218390804598, 'eval_f1_SUBJ': 0.6946386946386947, 

Train macro-F1: 0.9395051256254973
Val   macro-F1: 0.7851107538406574


#Load Model

In [None]:
output_dir = "/content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/models/Monolingual_german"
model     = AutoModelForSequenceClassification.from_pretrained(output_dir)
tokenizer = AutoTokenizer.from_pretrained(output_dir)

data_collator = DataCollatorWithPadding(tokenizer)

trainer = Trainer(
    model            = model,
    tokenizer        = tokenizer,
    data_collator   = data_collator,
    compute_metrics = compute_metrics
)

  trainer = Trainer(


#Result

Result for test data(labeled): **Macro F1: 0.78347**



In [None]:
metrics = trainer.evaluate(test_ds)

print("Result of test data")
print(f"OBJ – Precision: {metrics['eval_precision_OBJ']:.5f}, Recall: {metrics['eval_recall_OBJ']:.5f}, F1: {metrics['eval_f1_OBJ']:.5f}")
print(f"SUBJ – Precision: {metrics['eval_precision_SUBJ']:.5f}, Recall: {metrics['eval_recall_SUBJ']:.5f}, F1: {metrics['eval_f1_SUBJ']:.5f}")
print(f"Macro‐F1: {metrics['eval_macro_f1']:.5f}")


[34m[1mwandb[0m: Currently logged in as: [33mmehreganmohseni[0m ([33mmehreganmohseni-universit-di-bologna[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Result of test data
OBJ – Precision: 0.84100, Recall: 0.87773, F1: 0.85897
SUBJ – Precision: 0.74074, Recall: 0.67797, F1: 0.70796
Macro‐F1: 0.78347


Result for dev test data

In [None]:
metrics = trainer.evaluate(dev_test_ds)

print("Result of dev_test data")
print(f"OBJ – Precision: {metrics['eval_precision_OBJ']:.5f}, Recall: {metrics['eval_recall_OBJ']:.5f}, F1: {metrics['eval_f1_OBJ']:.5f}")
print(f"SUBJ – Precision: {metrics['eval_precision_SUBJ']:.5f}, Recall: {metrics['eval_recall_SUBJ']:.5f}, F1: {metrics['eval_f1_SUBJ']:.5f}")
print(f"Macro‐F1: {metrics['eval_macro_f1']:.5f}")


Result of dev_test data
OBJ – Precision: 0.91667, Recall: 0.86275, F1: 0.88889
SUBJ – Precision: 0.73750, Recall: 0.83099, F1: 0.78146
Macro‐F1: 0.83517


Prediction for test unlabeled data and save it

In [None]:
pred_out = trainer.predict(test_unlabeled_ds)
logits   = pred_out.predictions
pred_ids = logits.argmax(axis=-1)

pred_labels = le.inverse_transform(pred_ids)

df = pd.DataFrame({
    'sentence': test_unlabeled_df['sentence'],
    'prediction': pred_labels
})
save_path = "/content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/unlabeld_predict/german/german_predictions.tsv"
df.to_csv(save_path, sep='\t', index=False)

print(f"Saved predictions to {save_path}")


Saved predictions to /content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/unlabeld_predict/german/german_predictions.tsv


#Second Model

For tokenize data, we use the **mdeberta-v3-base** model.

In [None]:
model_name = "microsoft/mdeberta-v3-base"
tokenizer  = AutoTokenizer.from_pretrained(model_name)

max_len = 100

def tokenize(batch):
    return tokenizer(batch['sentence'],
                     padding='max_length',
                     truncation=True,
                     max_length=max_len)

train_ds    = train_ds.map(tokenize, batched=True)
dev_ds      = dev_ds.map(tokenize, batched=True)
dev_test_ds = dev_test_ds.map(tokenize, batched=True)
test_ds     = test_ds.map(tokenize, batched=True)
test_unlabeled_ds = test_unlabeled_ds.map(tokenize, batched=True)

cols = ['input_ids','attention_mask','labels']
train_ds    = train_ds.remove_columns([c for c in train_ds.column_names if c not in cols])
dev_ds      = dev_ds.remove_columns([c for c in dev_ds.column_names if c not in cols])
dev_test_ds = dev_test_ds.remove_columns([c for c in dev_test_ds.column_names if c not in cols])
test_ds     = test_ds.remove_columns([c for c in test_ds.column_names if c not in cols])
test_unlabeled_ds = test_unlabeled_ds.remove_columns(
    [c for c in test_unlabeled_ds.column_names if c not in ['input_ids','attention_mask']])



Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/491 [00:00<?, ? examples/s]

Map:   0%|          | 0/224 [00:00<?, ? examples/s]

Map:   0%|          | 0/347 [00:00<?, ? examples/s]

Map:   0%|          | 0/347 [00:00<?, ? examples/s]

In [None]:
data_collator = DataCollatorWithPadding(tokenizer)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = logits.argmax(axis=-1)
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, preds, labels=[0,1], zero_division=0
    )
    return {
        'accuracy': accuracy_score(labels, preds),
        'precision_OBJ': precision[0],
        'recall_OBJ':    recall[0],
        'f1_OBJ':        f1[0],
        'precision_SUBJ':precision[1],
        'recall_SUBJ':   recall[1],
        'f1_SUBJ':       f1[1],
        'macro_f1':      f1.mean()
    }


In [None]:
train_labels = train_ds["labels"]

counts = Counter(train_labels)
total  = counts[0] + counts[1]
# weight for OBJ = total/counts[0], for SUBJ = total/counts[1]
weights = [ total / counts[label] for label in train_labels ]

# sampler that samples N = len(train) items with replacement
sampler = WeightedRandomSampler(
    weights      = weights,
    num_samples  = len(weights),
    replacement  = True
)



class SamplerTrainer(Trainer):
    def get_train_dataloader(self):
        return DataLoader(
            self.train_dataset,
            sampler      = sampler,
            batch_size   = self.args.per_device_train_batch_size,
            collate_fn   = self.data_collator,
            num_workers  = self.args.dataloader_num_workers,
            pin_memory   = True,
        )

In [None]:
torch.cuda.empty_cache()
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

model.gradient_checkpointing_enable()

training_args = TrainingArguments(
    output_dir= "/content/results1_de",
    eval_strategy = 'epoch',
    save_strategy       = 'epoch',
    learning_rate       = 2e-5,
    per_device_train_batch_size = 16,
    gradient_accumulation_steps   = 4,
    per_device_eval_batch_size  = 64,
    num_train_epochs          = 6,
    weight_decay              = 0.1,
    warmup_ratio              = 0.1,
    lr_scheduler_type         = "linear",
    label_smoothing_factor    = 0.1,
    max_grad_norm             = 1.0,
    fp16                          = True,
    load_best_model_at_end=True,
    metric_for_best_model = 'macro_f1',
    greater_is_better   = True,
    logging_dir         = './logs_de',
    logging_steps       = 50,
    logging_strategy = 'epoch' ,
    seed = 42,
)

trainer = SamplerTrainer(
    model           = model,
    args            = training_args,
    train_dataset   = train_ds,
    eval_dataset    = dev_ds,
    tokenizer       = tokenizer,
    data_collator   = data_collator,
    compute_metrics = compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=1)]
)




Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = SamplerTrainer(


In [None]:
trainer.train()
output_dir = "/content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/models/Monolingual_german1"
Path(output_dir).mkdir(parents=True, exist_ok=True)
trainer.save_model(output_dir)

print(f"Final model saved to {output_dir}")


Epoch,Training Loss,Validation Loss,Accuracy,Precision Obj,Recall Obj,F1 Obj,Precision Subj,Recall Subj,F1 Subj,Macro F1
1,0.6647,0.7033,0.354379,0.0,0.0,0.0,0.354379,1.0,0.523308,0.261654
2,0.6558,0.644441,0.657841,0.654244,0.996845,0.79,0.875,0.04023,0.076923,0.433462
3,0.5897,0.564104,0.749491,0.814935,0.791798,0.8032,0.639344,0.672414,0.655462,0.729331
4,0.4885,0.520804,0.802444,0.831325,0.870662,0.850539,0.742138,0.678161,0.708709,0.779624
5,0.432,0.522441,0.790224,0.851974,0.817035,0.834138,0.68984,0.741379,0.714681,0.77441


Final model saved to /content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/models/Monolingual_german1


In [None]:
for record in trainer.state.log_history:
    if 'eval_loss' in record or 'loss' in record:
        print(record)

train_metrics = trainer.evaluate(train_ds)
val_metrics   = trainer.evaluate(dev_ds)
print("Train macro-F1:", train_metrics['eval_macro_f1'])
print("Val   macro-F1:", val_metrics['eval_macro_f1'])


{'loss': 0.6647, 'grad_norm': 0.47663822770118713, 'learning_rate': 1.885714285714286e-05, 'epoch': 1.0, 'step': 13}
{'eval_loss': 0.7032999992370605, 'eval_accuracy': 0.3543788187372709, 'eval_precision_OBJ': 0.0, 'eval_recall_OBJ': 0.0, 'eval_f1_OBJ': 0.0, 'eval_precision_SUBJ': 0.3543788187372709, 'eval_recall_SUBJ': 1.0, 'eval_f1_SUBJ': 0.5233082706766917, 'eval_macro_f1': 0.26165413533834586, 'eval_runtime': 1.2899, 'eval_samples_per_second': 380.655, 'eval_steps_per_second': 6.202, 'epoch': 1.0, 'step': 13}
{'loss': 0.6558, 'grad_norm': 0.9440619945526123, 'learning_rate': 1.5142857142857144e-05, 'epoch': 2.0, 'step': 26}
{'eval_loss': 0.6444412469863892, 'eval_accuracy': 0.6578411405295316, 'eval_precision_OBJ': 0.6542443064182195, 'eval_recall_OBJ': 0.9968454258675079, 'eval_f1_OBJ': 0.79, 'eval_precision_SUBJ': 0.875, 'eval_recall_SUBJ': 0.040229885057471264, 'eval_f1_SUBJ': 0.07692307692307693, 'eval_macro_f1': 0.43346153846153845, 'eval_runtime': 1.0826, 'eval_samples_per_se

Train macro-F1: 0.8368940345998557
Val   macro-F1: 0.779623999962983


In [None]:
output_dir = "/content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/models/Monolingual_german1"
model     = AutoModelForSequenceClassification.from_pretrained(output_dir)
tokenizer = AutoTokenizer.from_pretrained(output_dir)

data_collator = DataCollatorWithPadding(tokenizer)

trainer = Trainer(
    model            = model,
    tokenizer        = tokenizer,
    data_collator   = data_collator,
    compute_metrics = compute_metrics
)

  trainer = Trainer(


#Result

Result for test data(labeled): **Macro F1: 0.75289**



In [None]:
metrics = trainer.evaluate(test_ds)

print("Result of test data")
print(f"OBJ – Precision: {metrics['eval_precision_OBJ']:.5f}, Recall: {metrics['eval_recall_OBJ']:.5f}, F1: {metrics['eval_f1_OBJ']:.5f}")
print(f"SUBJ – Precision: {metrics['eval_precision_SUBJ']:.5f}, Recall: {metrics['eval_recall_SUBJ']:.5f}, F1: {metrics['eval_f1_SUBJ']:.5f}")
print(f"Macro‐F1: {metrics['eval_macro_f1']:.5f}")


Result of test data
OBJ – Precision: 0.82553, Recall: 0.84716, F1: 0.83621
SUBJ – Precision: 0.68750, Recall: 0.65254, F1: 0.66957
Macro‐F1: 0.75289


Result for dev test data

In [None]:
metrics = trainer.evaluate(dev_test_ds)

print("Result of dev_test data")
print(f"OBJ – Precision: {metrics['eval_precision_OBJ']:.5f}, Recall: {metrics['eval_recall_OBJ']:.5f}, F1: {metrics['eval_f1_OBJ']:.5f}")
print(f"SUBJ – Precision: {metrics['eval_precision_SUBJ']:.5f}, Recall: {metrics['eval_recall_SUBJ']:.5f}, F1: {metrics['eval_f1_SUBJ']:.5f}")
print(f"Macro‐F1: {metrics['eval_macro_f1']:.5f}")


Result of dev_test data
OBJ – Precision: 0.88652, Recall: 0.81699, F1: 0.85034
SUBJ – Precision: 0.66265, Recall: 0.77465, F1: 0.71429
Macro‐F1: 0.78231


Prediction for test unlabeled data and save it

In [None]:
pred_out = trainer.predict(test_unlabeled_ds)
logits   = pred_out.predictions
pred_ids = logits.argmax(axis=-1)

pred_labels = le.inverse_transform(pred_ids)

df = pd.DataFrame({
    'sentence': test_unlabeled_df['sentence'],
    'prediction': pred_labels
})
save_path = "/content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/unlabeld_predict/german/german_predictions1.tsv"
df.to_csv(save_path, sep='\t', index=False)

print(f"Saved predictions to {save_path}")


Saved predictions to /content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/unlabeld_predict/german/german_predictions1.tsv
