##Monolingual_Arabic

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from datasets import Dataset, concatenate_datasets
from transformers import AutoTokenizer
from collections import Counter
import random
import torch
from transformers import DataCollatorWithPadding, EarlyStoppingCallback
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments
from torch.utils.data import DataLoader, WeightedRandomSampler
from pathlib import Path

##Load Data

In [None]:
base_dir = '/content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/data/arabic'
train_path = f'{base_dir}/train_ar.tsv'
dev_path   = f'{base_dir}/dev_ar.tsv'
dev_test_path = f'{base_dir}/dev_test_ar.tsv'
test_path = f'{base_dir}/test_ar_labeled.tsv'
test_unlabeled_path = f'{base_dir}/test_ar_unlabeled.tsv'

In [None]:
train_df = pd.read_csv(train_path, sep='\t')
dev_df   = pd.read_csv(dev_path, sep='\t')
dev_test_df = pd.read_csv(dev_test_path, sep='\t')
test_df = pd.read_csv(test_path, sep='\t')
test_unlabeled_df = pd.read_csv(test_unlabeled_path, sep='\t')

##Pre Processing

In [None]:
le = LabelEncoder()
train_df['label_id']    = le.fit_transform(train_df['label'])
dev_df['label_id']      = le.transform(dev_df['label'])
dev_test_df['label_id'] = le.transform(dev_test_df['label'])
test_df['label_id']     = le.transform(test_df['label'])

for df in (train_df, dev_df, dev_test_df, test_df):
    df.drop(columns=['label'], inplace=True)

for df in (train_df, dev_df, dev_test_df, test_df):
    df.rename(columns={'label_id':'labels'}, inplace=True)

print("Mapped classes:", dict(enumerate(le.classes_)))


Mapped classes: {0: 'OBJ', 1: 'SUBJ'}


In [None]:
train_ds    = Dataset.from_pandas(train_df[['sentence','labels']])
dev_ds      = Dataset.from_pandas(dev_df[['sentence','labels']])
dev_test_ds = Dataset.from_pandas(dev_test_df[['sentence','labels']])
test_ds = Dataset.from_pandas(test_df[['sentence','labels']])
test_unlabeled_ds = Dataset.from_pandas(test_unlabeled_df[['sentence']])

In [None]:
counts = Counter(train_ds['labels'])
n_obj, n_subj = counts[0], counts[1]
print(f"Original counts → OBJ: {n_obj}, SUBJ: {n_subj}")

Original counts → OBJ: 1391, SUBJ: 1055


##First Model

For tokenize data, we use the **aubmindlab/araelectra-base-discriminator** model.

In [None]:
model_name = "aubmindlab/araelectra-base-discriminator"
tokenizer  = AutoTokenizer.from_pretrained(model_name)

max_len = 100

def tokenize(batch):
    return tokenizer(batch['sentence'],
                     padding='max_length',
                     truncation=True,
                     max_length=max_len)

train_ds    = train_ds.map(tokenize, batched=True)
dev_ds      = dev_ds.map(tokenize, batched=True)
dev_test_ds = dev_test_ds.map(tokenize, batched=True)
test_ds     = test_ds.map(tokenize, batched=True)
test_unlabeled_ds = test_unlabeled_ds.map(tokenize, batched=True)

cols = ['input_ids','attention_mask','labels']
train_ds    = train_ds.remove_columns([c for c in train_ds.column_names if c not in cols])
dev_ds      = dev_ds.remove_columns([c for c in dev_ds.column_names if c not in cols])
dev_test_ds = dev_test_ds.remove_columns([c for c in dev_test_ds.column_names if c not in cols])
test_ds     = test_ds.remove_columns([c for c in test_ds.column_names if c not in cols])
test_unlabeled_ds = test_unlabeled_ds.remove_columns(
    [c for c in test_unlabeled_ds.column_names if c not in ['input_ids','attention_mask']])

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/392 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/503 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Map:   0%|          | 0/2446 [00:00<?, ? examples/s]

Map:   0%|          | 0/467 [00:00<?, ? examples/s]

Map:   0%|          | 0/748 [00:00<?, ? examples/s]

Map:   0%|          | 0/1036 [00:00<?, ? examples/s]

Map:   0%|          | 0/1036 [00:00<?, ? examples/s]

 Define a data collator for dynamic padding and a metrics function to compute per-class precision, recall, F1, and macro F1 score.


In [None]:
data_collator = DataCollatorWithPadding(tokenizer)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = logits.argmax(axis=-1)
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, preds, labels=[0,1], zero_division=0
    )
    return {
        'accuracy': accuracy_score(labels, preds),
        'precision_OBJ': precision[0],
        'recall_OBJ':    recall[0],
        'f1_OBJ':        f1[0],
        'precision_SUBJ':precision[1],
        'recall_SUBJ':   recall[1],
        'f1_SUBJ':       f1[1],
        'macro_f1':      f1.mean()
    }


 Use WeightedRandomSampler to balance class sampling in each batch, and customize Trainer to use this sampler during training.


In [None]:
# Extract train labels (0 or 1)
train_labels = train_ds["labels"]  # a list or array of 0/1


counts = Counter(train_labels)
total  = counts[0] + counts[1]
# weight for OBJ = total/counts[0], for SUBJ = total/counts[1]
weights = [ total / counts[label] for label in train_labels ]

# sampler that samples N = len(train) items with replacement
sampler = WeightedRandomSampler(
    weights      = weights,
    num_samples  = len(weights),
    replacement  = True
)



class SamplerTrainer(Trainer):
    def get_train_dataloader(self):
        return DataLoader(
            self.train_dataset,
            sampler      = sampler,
            batch_size   = self.args.per_device_train_batch_size,
            collate_fn   = self.data_collator,
            num_workers  = self.args.dataloader_num_workers,
            pin_memory   = True,
        )

 Initialize model **(aubmindlab/araelectra-base-discriminator)** and training configuration with gradient checkpointing and early stopping.

 Uses a custom SamplerTrainer to address class imbalance, and selects the best model based on macro F1 score.


In [None]:
torch.cuda.empty_cache()
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
model.config.hidden_dropout_prob          = 0.3
model.config.attention_probs_dropout_prob = 0.3
model.config.classifier_dropout           = 0.3
model.gradient_checkpointing_enable()

training_args = TrainingArguments(
    output_dir= "/content/results_ar",
    eval_strategy = 'epoch',
    save_strategy       = 'epoch',
    learning_rate       = 6e-5,
    per_device_train_batch_size = 16,
    gradient_accumulation_steps   = 2,
    per_device_eval_batch_size  = 64,
    num_train_epochs          = 3,
    weight_decay              = 0.3,
    warmup_ratio              = 0.4,
    lr_scheduler_type         = "cosine",
    label_smoothing_factor    = 0.1,
    max_grad_norm             = 1.0,
    fp16                          = True,
    load_best_model_at_end=True,
    metric_for_best_model = 'macro_f1',
    greater_is_better   = True,
    logging_dir         = './logs_ar',
    logging_steps       = 50,
    logging_strategy = 'epoch' ,
    seed = 42,
)

trainer = SamplerTrainer(
    model           = model,
    args            = training_args,
    train_dataset   = train_ds,
    eval_dataset    = dev_ds,
    tokenizer       = tokenizer,
    data_collator   = data_collator,
    compute_metrics = compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=1)]
)




Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at aubmindlab/araelectra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = SamplerTrainer(


Train and fine tuning the model and save the best model.

In [None]:
trainer.train()
output_dir = "/content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/models/Monolingual_arabic"
Path(output_dir).mkdir(parents=True, exist_ok=True)
trainer.save_model(output_dir)

print(f"Final model saved to {output_dir}")


Epoch,Training Loss,Validation Loss,Accuracy,Precision Obj,Recall Obj,F1 Obj,Precision Subj,Recall Subj,F1 Subj,Macro F1
1,0.6807,0.693499,0.554604,0.612403,0.593985,0.603053,0.483254,0.502488,0.492683,0.547868
2,0.5738,0.916266,0.513919,0.612717,0.398496,0.482916,0.455782,0.666667,0.541414,0.512165


Final model saved to /content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/models/Monolingual_arabic


training and evaluation loss logs for each epoch, then evaluate and display final macro F1 scores on the train and validation sets.


In [None]:
for record in trainer.state.log_history:
    if 'eval_loss' in record or 'loss' in record:
        print(record)

train_metrics = trainer.evaluate(train_ds)
val_metrics   = trainer.evaluate(dev_ds)
print("Train macro-F1:", train_metrics['eval_macro_f1'])
print("Val   macro-F1:", val_metrics['eval_macro_f1'])


{'loss': 0.6807, 'grad_norm': 1.025607943534851, 'learning_rate': 4.9032258064516135e-05, 'epoch': 1.0, 'step': 77}
{'eval_loss': 0.6934988498687744, 'eval_accuracy': 0.5546038543897216, 'eval_precision_OBJ': 0.6124031007751938, 'eval_recall_OBJ': 0.5939849624060151, 'eval_f1_OBJ': 0.6030534351145038, 'eval_precision_SUBJ': 0.48325358851674644, 'eval_recall_SUBJ': 0.5024875621890548, 'eval_f1_SUBJ': 0.4926829268292683, 'eval_macro_f1': 0.547868180971886, 'eval_runtime': 0.7163, 'eval_samples_per_second': 651.937, 'eval_steps_per_second': 11.168, 'epoch': 1.0, 'step': 77}
{'loss': 0.5738, 'grad_norm': 3.063960075378418, 'learning_rate': 3.610368039157902e-05, 'epoch': 2.0, 'step': 154}
{'eval_loss': 0.9162663221359253, 'eval_accuracy': 0.5139186295503212, 'eval_precision_OBJ': 0.6127167630057804, 'eval_recall_OBJ': 0.39849624060150374, 'eval_f1_OBJ': 0.48291571753986334, 'eval_precision_SUBJ': 0.4557823129251701, 'eval_recall_SUBJ': 0.6666666666666666, 'eval_f1_SUBJ': 0.5414141414141415

Train macro-F1: 0.6315631364449714
Val   macro-F1: 0.547868180971886


Load Model

In [None]:
output_dir = "/content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/models/Monolingual_arabic"
model     = AutoModelForSequenceClassification.from_pretrained(output_dir)
tokenizer = AutoTokenizer.from_pretrained(output_dir)

data_collator = DataCollatorWithPadding(tokenizer)

trainer = Trainer(
    model            = model,
    tokenizer        = tokenizer,
    data_collator   = data_collator,
    compute_metrics = compute_metrics
)

  trainer = Trainer(


##Result

Result for test data(labeled): **Macro F1: 0.59194**



In [None]:

metrics = trainer.evaluate(test_ds)

print("Result of test data")
print(f"OBJ – Precision: {metrics['eval_precision_OBJ']:.5f}, Recall: {metrics['eval_recall_OBJ']:.5f}, F1: {metrics['eval_f1_OBJ']:.5f}")
print(f"SUBJ – Precision: {metrics['eval_precision_SUBJ']:.5f}, Recall: {metrics['eval_recall_SUBJ']:.5f}, F1: {metrics['eval_f1_SUBJ']:.5f}")
print(f"Macro‐F1: {metrics['eval_macro_f1']:.5f}")


Result of test data
OBJ – Precision: 0.79963, Recall: 0.59835, F1: 0.68450
SUBJ – Precision: 0.40650, Recall: 0.64725, F1: 0.49938
Macro‐F1: 0.59194


Result for dev test data

In [None]:
metrics = trainer.evaluate(dev_test_ds)


print("Result of dev_test data")
print(f"OBJ – Precision: {metrics['eval_precision_OBJ']:.5f}, Recall: {metrics['eval_recall_OBJ']:.5f}, F1: {metrics['eval_f1_OBJ']:.5f}")
print(f"SUBJ – Precision: {metrics['eval_precision_SUBJ']:.5f}, Recall: {metrics['eval_recall_SUBJ']:.5f}, F1: {metrics['eval_f1_SUBJ']:.5f}")
print(f"Macro‐F1: {metrics['eval_macro_f1']:.5f}")


Result of dev_test data
OBJ – Precision: 0.62287, Recall: 0.60235, F1: 0.61244
SUBJ – Precision: 0.49852, Recall: 0.52012, F1: 0.50909
Macro‐F1: 0.56077


Prediction for test unlabeled data and save it

In [None]:

pred_out = trainer.predict(test_unlabeled_ds)
logits   = pred_out.predictions
pred_ids = logits.argmax(axis=-1)

pred_labels = le.inverse_transform(pred_ids)

df = pd.DataFrame({
    'sentence': test_unlabeled_df['sentence'],
    'prediction': pred_labels
})
save_path = "/content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/unlabeld_predict/arabic/arabic_predictions.tsv"
df.to_csv(save_path, sep='\t', index=False)

print(f"Saved predictions to {save_path}")


Saved predictions to /content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/unlabeld_predict/arabic/arabic_predictions.tsv


#Second Model

For tokenize data, we use the **mdeberta-v3-base** model.

In [None]:
model_name = "microsoft/mdeberta-v3-base"
tokenizer  = AutoTokenizer.from_pretrained(model_name)

max_len = 100

def tokenize(batch):
    return tokenizer(batch['sentence'],
                     padding='max_length',
                     truncation=True,
                     max_length=max_len)

train_ds    = train_ds.map(tokenize, batched=True)
dev_ds      = dev_ds.map(tokenize, batched=True)
dev_test_ds = dev_test_ds.map(tokenize, batched=True)
test_ds     = test_ds.map(tokenize, batched=True)
test_unlabeled_ds = test_unlabeled_ds.map(tokenize, batched=True)

cols = ['input_ids','attention_mask','labels']
train_ds    = train_ds.remove_columns([c for c in train_ds.column_names if c not in cols])
dev_ds      = dev_ds.remove_columns([c for c in dev_ds.column_names if c not in cols])
dev_test_ds = dev_test_ds.remove_columns([c for c in dev_test_ds.column_names if c not in cols])
test_ds     = test_ds.remove_columns([c for c in test_ds.column_names if c not in cols])
test_unlabeled_ds = test_unlabeled_ds.remove_columns(
    [c for c in test_unlabeled_ds.column_names if c not in ['input_ids','attention_mask']])



Map:   0%|          | 0/2446 [00:00<?, ? examples/s]

Map:   0%|          | 0/467 [00:00<?, ? examples/s]

Map:   0%|          | 0/748 [00:00<?, ? examples/s]

Map:   0%|          | 0/1036 [00:00<?, ? examples/s]

Map:   0%|          | 0/1036 [00:00<?, ? examples/s]

In [None]:
data_collator = DataCollatorWithPadding(tokenizer)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = logits.argmax(axis=-1)
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, preds, labels=[0,1], zero_division=0
    )
    return {
        'accuracy': accuracy_score(labels, preds),
        'precision_OBJ': precision[0],
        'recall_OBJ':    recall[0],
        'f1_OBJ':        f1[0],
        'precision_SUBJ':precision[1],
        'recall_SUBJ':   recall[1],
        'f1_SUBJ':       f1[1],
        'macro_f1':      f1.mean()
    }


In [None]:
# Extract  train labels (0 or 1)
train_labels = train_ds["labels"]  # a list or array of 0/1


counts = Counter(train_labels)
total  = counts[0] + counts[1]
# weight for OBJ = total/counts[0], for SUBJ = total/counts[1]
weights = [ total / counts[label] for label in train_labels ]

# sampler that samples N = len(train) items with replacement
sampler = WeightedRandomSampler(
    weights      = weights,
    num_samples  = len(weights),
    replacement  = True
)



class SamplerTrainer(Trainer):
    def get_train_dataloader(self):
        return DataLoader(
            self.train_dataset,
            sampler      = sampler,
            batch_size   = self.args.per_device_train_batch_size,
            collate_fn   = self.data_collator,
            num_workers  = self.args.dataloader_num_workers,
            pin_memory   = True,
        )

In [None]:
torch.cuda.empty_cache()
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

model.gradient_checkpointing_enable()

training_args = TrainingArguments(
    output_dir= "/content/results1_ar",
    eval_strategy = 'epoch',
    save_strategy       = 'epoch',
    learning_rate       = 4e-5,
    per_device_train_batch_size = 16,
    gradient_accumulation_steps   = 4,
    per_device_eval_batch_size  = 64,
    num_train_epochs          = 3,
    weight_decay              = 0.2,
    warmup_ratio              = 0.08,
    lr_scheduler_type         = "cosine",
    label_smoothing_factor    = 0.1,
    max_grad_norm             = 1.0,
    fp16                          = True,
    load_best_model_at_end=True,
    metric_for_best_model = 'macro_f1',
    greater_is_better   = True,
    logging_dir         = './logs_ar',
    logging_steps       = 50,
    logging_strategy = 'epoch' ,
    seed = 42,
)

trainer = SamplerTrainer(
    model           = model,
    args            = training_args,
    train_dataset   = train_ds,
    eval_dataset    = dev_ds,
    tokenizer       = tokenizer,
    data_collator   = data_collator,
    compute_metrics = compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=1)]
)




Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = SamplerTrainer(


In [None]:
trainer.train()
output_dir = "/content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/models/Monolingual_arabic1"
Path(output_dir).mkdir(parents=True, exist_ok=True)
trainer.save_model(output_dir)

print(f"Final model saved to {output_dir}")


Epoch,Training Loss,Validation Loss,Accuracy,Precision Obj,Recall Obj,F1 Obj,Precision Subj,Recall Subj,F1 Subj,Macro F1
1,0.6789,0.693613,0.573876,0.573626,0.981203,0.723994,0.583333,0.034826,0.065728,0.394861
2,0.6722,0.693185,0.569593,0.599388,0.736842,0.661046,0.5,0.348259,0.410557,0.535801
3,0.6576,0.700711,0.563169,0.597484,0.714286,0.650685,0.489933,0.363184,0.417143,0.533914


Final model saved to /content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/models/Monolingual_arabic1


In [None]:

for record in trainer.state.log_history:
    if 'eval_loss' in record or 'loss' in record:
        print(record)

train_metrics = trainer.evaluate(train_ds)
val_metrics   = trainer.evaluate(dev_ds)
print("Train macro-F1:", train_metrics['eval_macro_f1'])
print("Val   macro-F1:", val_metrics['eval_macro_f1'])


{'loss': 0.6789, 'grad_norm': 0.6950065493583679, 'learning_rate': 3.361370043098126e-05, 'epoch': 1.0, 'step': 39}
{'eval_loss': 0.6936134696006775, 'eval_accuracy': 0.5738758029978587, 'eval_precision_OBJ': 0.5736263736263736, 'eval_recall_OBJ': 0.981203007518797, 'eval_f1_OBJ': 0.723994452149792, 'eval_precision_SUBJ': 0.5833333333333334, 'eval_recall_SUBJ': 0.03482587064676617, 'eval_f1_SUBJ': 0.06572769953051644, 'eval_macro_f1': 0.3948610758401542, 'eval_runtime': 1.0161, 'eval_samples_per_second': 459.579, 'eval_steps_per_second': 7.873, 'epoch': 1.0, 'step': 39}
{'loss': 0.6722, 'grad_norm': 0.8246380686759949, 'learning_rate': 1.227856872452637e-05, 'epoch': 2.0, 'step': 78}
{'eval_loss': 0.693185031414032, 'eval_accuracy': 0.569593147751606, 'eval_precision_OBJ': 0.599388379204893, 'eval_recall_OBJ': 0.7368421052631579, 'eval_f1_OBJ': 0.6610455311973018, 'eval_precision_SUBJ': 0.5, 'eval_recall_SUBJ': 0.3482587064676617, 'eval_f1_SUBJ': 0.41055718475073316, 'eval_macro_f1': 0

Train macro-F1: 0.5910280799221077
Val   macro-F1: 0.5358013579740175


In [None]:

output_dir = "/content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/models/Monolingual_arabic1"
model     = AutoModelForSequenceClassification.from_pretrained(output_dir)
tokenizer = AutoTokenizer.from_pretrained(output_dir)

data_collator = DataCollatorWithPadding(tokenizer)

trainer = Trainer(
    model            = model,
    tokenizer        = tokenizer,
    data_collator   = data_collator,
    compute_metrics = compute_metrics
)

  trainer = Trainer(


##Result

Result for test data(labeled): **Macro F1: 0.57380**



In [None]:

metrics = trainer.evaluate(test_ds)

print("Result of test data")
print(f"OBJ – Precision: {metrics['eval_precision_OBJ']:.5f}, Recall: {metrics['eval_recall_OBJ']:.5f}, F1: {metrics['eval_f1_OBJ']:.5f}")
print(f"SUBJ – Precision: {metrics['eval_precision_SUBJ']:.5f}, Recall: {metrics['eval_recall_SUBJ']:.5f}, F1: {metrics['eval_f1_SUBJ']:.5f}")
print(f"Macro‐F1: {metrics['eval_macro_f1']:.5f}")


Result of test data
OBJ – Precision: 0.75110, Recall: 0.70564, F1: 0.72766
SUBJ – Precision: 0.39377, Recall: 0.44984, F1: 0.41994
Macro‐F1: 0.57380


Result for dev test data

In [None]:
metrics = trainer.evaluate(dev_test_ds)

print("Result of dev_test data")
print(f"OBJ – Precision: {metrics['eval_precision_OBJ']:.5f}, Recall: {metrics['eval_recall_OBJ']:.5f}, F1: {metrics['eval_f1_OBJ']:.5f}")
print(f"SUBJ – Precision: {metrics['eval_precision_SUBJ']:.5f}, Recall: {metrics['eval_recall_SUBJ']:.5f}, F1: {metrics['eval_f1_SUBJ']:.5f}")
print(f"Macro‐F1: {metrics['eval_macro_f1']:.5f}")


Result of dev_test data
OBJ – Precision: 0.61070, Recall: 0.77882, F1: 0.68459
SUBJ – Precision: 0.54369, Recall: 0.34675, F1: 0.42344
Macro‐F1: 0.55402


Prediction for test unlabeled data and save it

In [None]:
pred_out = trainer.predict(test_unlabeled_ds)
logits   = pred_out.predictions
pred_ids = logits.argmax(axis=-1)

pred_labels = le.inverse_transform(pred_ids)


df = pd.DataFrame({
    'sentence': test_unlabeled_df['sentence'],
    'prediction': pred_labels
})
save_path = "/content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/unlabeld_predict/arabic/arabic_predictions1.tsv"
df.to_csv(save_path, sep='\t', index=False)

print(f"Saved predictions to {save_path}")


Saved predictions to /content/drive/MyDrive/clef2025-checkthat-lab-main-task1/task1/unlabeld_predict/arabic/arabic_predictions1.tsv
