# [Task 3: Multimodal Hate Speech Detection in Memes](https://github.com/marsadlab/MAHED2025Dataset/tree/main/task3/) at [ArabicNLP 2025](http://arabicnlp2025.sigarab.org/) @ACL 2025


Given multimodal content (text extracted from meme and the meme itself) the task is to detect whether the content is hateful or not-hateful. This is a binary classification task.



### installing required libraries.
 - transformers
 - datasets
 - evaluate
 - accelerate

In [3]:
!pip install transformers
!pip install datasets
!pip install evaluate
!pip install --upgrade accelerate
!pip install -U datasets

Collecting evaluate
  Downloading evaluate-0.4.5-py3-none-any.whl.metadata (9.5 kB)
Downloading evaluate-0.4.5-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.5
Collecting accelerate
  Downloading accelerate-1.9.0-py3-none-any.whl.metadata (19 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torc

## Download data from HF: https://huggingface.co/datasets/QCRI/Prop2Hate-Meme

In [4]:
from datasets import load_dataset

dataset = load_dataset("QCRI/Prop2Hate-Meme")

# Specify the directory where you want to save the dataset

output_dir="./Prop2Hate-Meme"

# Save the dataset to the specified directory. This will save all splits to the output directory.
dataset.save_to_disk(output_dir)

# If you want to get the raw images from HF dataset format

from PIL import Image
import os
import json

# Directory to save the images
output_dir="./Prop2Hate-Meme/"
os.makedirs(output_dir, exist_ok=True)

# Iterate over the dataset and save each image
for split in ['train','dev','test']:
    jsonl_path = os.path.join(output_dir, f"arabic_hateful_meme_{split}.jsonl")
    with open(jsonl_path, 'w', encoding='utf-8') as f:
        for idx, item in enumerate(dataset[split]):
            # Access the image directly as it's already a PIL.Image object
            image = item['image']
            image_path = os.path.join(output_dir, item['img_path'])
            # Ensure the directory exists
            os.makedirs(os.path.dirname(image_path), exist_ok=True)
            image.save(image_path)
            del item['image']
            del item['prop_label']
            del item['hate_fine_grained_label']
            item['label'] = item.pop('hate_label')
            f.write(json.dumps(item, ensure_ascii=False) + '\n')


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

(…)-00000-of-00001-d9ac21f8cab19f0c.parquet:   0%|          | 0.00/155M [00:00<?, ?B/s]

(…)-00000-of-00001-3d9abbec7a460a82.parquet:   0%|          | 0.00/45.2M [00:00<?, ?B/s]

(…)-00000-of-00001-1c8a7be6beca8257.parquet:   0%|          | 0.00/21.7M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2143 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/606 [00:00<?, ? examples/s]

Generating dev split:   0%|          | 0/312 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/2143 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/606 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/312 [00:00<?, ? examples/s]

In [5]:
jsonl_path = "./Prop2Hate-Meme/arabic_hateful_meme_train.jsonl" # Example path, modify as needed
data = []
with open(jsonl_path, 'r', encoding='utf-8') as f:
  for line in f:
    data.append(json.loads(line))

# data is now a list of dictionaries, where each dictionary is a parsed JSON object from a line in the file.
print(f"Loaded {len(data)} entries from {jsonl_path}")
if data:
    print("First entry:")
data[0]

Loaded 2143 entries from ./Prop2Hate-Meme/arabic_hateful_meme_train.jsonl
First entry:


{'id': 'data/arabic_memes_fb_insta_pinterest/Pinterest/images/pinterest_images_part2/www.pinterest.com_pin_374924737743995066/7485ad3c9c4cd8159ce93997a18a53a8.jpg',
 'text': 'زوجة ماكرون تصرح أن الحجاب يرعب ويخيف الأطفال..😅😂😂',
 'img_path': './data/arabic_memes_fb_insta_pinterest/Pinterest/images/pinterest_images_part2/www.pinterest.com_pin_374924737743995066/7485ad3c9c4cd8159ce93997a18a53a8.jpg',
 'label': 0}

### Defining the training, validation, and test data

In [6]:
train_file = './Prop2Hate-Meme/arabic_hateful_meme_train.jsonl'
validation_file = './Prop2Hate-Meme/arabic_hateful_meme_dev.jsonl'
test_file = './Prop2Hate-Meme/arabic_hateful_meme_test.jsonl'

In [7]:
import logging
import os
import random
import sys
from dataclasses import dataclass, field
from typing import Optional
import pandas as pd
import datasets
import evaluate
import numpy as np
from datasets import load_dataset, Dataset, DatasetDict
import torch

import transformers
from transformers import (
    AutoConfig,
    AutoModelForSequenceClassification,
    AutoTokenizer,
    DataCollatorWithPadding,
    EvalPrediction,
    HfArgumentParser,
    PretrainedConfig,
    Trainer,
    TrainingArguments,
    default_data_collator,
    set_seed,
)
from transformers.trainer_utils import get_last_checkpoint
from transformers.utils import check_min_version, send_example_telemetry
from transformers.utils.versions import require_version


logger = logging.getLogger(__name__)

logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
    datefmt="%m/%d/%Y %H:%M:%S",
    handlers=[logging.StreamHandler(sys.stdout)],
)

### Setting up the training parameters

In [8]:
training_args = TrainingArguments(
    learning_rate=2e-5,
    num_train_epochs=1,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    output_dir="./distilBERT_m/",
    overwrite_output_dir=True,
    remove_unused_columns=False,
    local_rank= 1,
    load_best_model_at_end=True,
    save_total_limit=2,
    save_strategy="no"
)

max_train_samples = None
max_eval_samples=None
max_predict_samples=None
max_seq_length = 512
batch_size = 16

In [9]:
transformers.utils.logging.set_verbosity_info()

log_level = training_args.get_process_log_level()
logger.setLevel(log_level)
datasets.utils.logging.set_verbosity(log_level)
transformers.utils.logging.set_verbosity(log_level)
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()
logger.warning(
    f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
    + f" distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
)
# logger.info(f"Training/evaluation parameters {training_args}")



#### Defining the Model

In [10]:
model_name = 'distilbert-base-multilingual-cased'

#### setting the random seed

In [11]:
set_seed(training_args.seed)

#### Loading data files

In [12]:
import json
import pandas as pd

## label 0 -> not-hateful, 1 -> hateful
def read_jsonl_to_df(filename):
    return pd.read_json(filename, lines=True)


l2id = {'not-hateful': 0, 'hateful': 1}
train_df = read_jsonl_to_df(train_file)
# train_df['label'] = train_df['label'].map(l2id)
train_df = Dataset.from_pandas(train_df)
validation_df = read_jsonl_to_df(validation_file)
# validation_df['label'] = validation_df['label'].map(l2id)
validation_df = Dataset.from_pandas(validation_df)
test_df = read_jsonl_to_df(test_file)
# test_df['label'] = test_df['label'].map(l2id)
test_df = Dataset.from_pandas(test_df)



data_files = {"train": train_df, "validation": validation_df, "test": validation_df}

for key in data_files.keys():
    logger.info(f"loading a local file for {key}")
raw_datasets = DatasetDict(
    {"train": train_df, "validation": validation_df, "test": test_df}
)

INFO:__main__:loading a local file for train
INFO:__main__:loading a local file for validation
INFO:__main__:loading a local file for test


##### Extracting number of unique labels

In [13]:
# Labels
label_list = raw_datasets["train"].unique("label")
label_list.sort()  # sort the labels for determine
num_labels = len(label_list)

### Loading Pretrained Configuration, Tokenizer and Model

In [14]:
config = AutoConfig.from_pretrained(
    model_name,
    num_labels=num_labels,
    finetuning_task=None,
    cache_dir=None,
    revision="main",
    use_auth_token=None,
)

tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    cache_dir=None,
    use_fast=True,
    revision="main",
    use_auth_token=None,
)

model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    from_tf=bool(".ckpt" in model_name),
    config=config,
    cache_dir=None,
    revision="main",
    use_auth_token=None,
    ignore_mismatched_sizes=False,
)

config.json:   0%|          | 0.00/466 [00:00<?, ?B/s]

[INFO|configuration_utils.py:711] 2025-07-18 20:00:30,549 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--distilbert-base-multilingual-cased/snapshots/45c032ab32cc946ad88a166f7cb282f58c753c2e/config.json
[INFO|configuration_utils.py:774] 2025-07-18 20:00:30,593 >> Model config DistilBertConfig {
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "initializer_range": 0.02,
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "output_past": true,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.53.2",
  "vocab_size": 119547
}



tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

[INFO|configuration_utils.py:711] 2025-07-18 20:00:30,864 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--distilbert-base-multilingual-cased/snapshots/45c032ab32cc946ad88a166f7cb282f58c753c2e/config.json
[INFO|configuration_utils.py:774] 2025-07-18 20:00:30,868 >> Model config DistilBertConfig {
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "initializer_range": 0.02,
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "output_past": true,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.53.2",
  "vocab_size": 119547
}



vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

[INFO|tokenization_utils_base.py:2012] 2025-07-18 20:00:32,212 >> loading file vocab.txt from cache at /root/.cache/huggingface/hub/models--distilbert-base-multilingual-cased/snapshots/45c032ab32cc946ad88a166f7cb282f58c753c2e/vocab.txt
[INFO|tokenization_utils_base.py:2012] 2025-07-18 20:00:32,213 >> loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--distilbert-base-multilingual-cased/snapshots/45c032ab32cc946ad88a166f7cb282f58c753c2e/tokenizer.json
[INFO|tokenization_utils_base.py:2012] 2025-07-18 20:00:32,215 >> loading file added_tokens.json from cache at None
[INFO|tokenization_utils_base.py:2012] 2025-07-18 20:00:32,217 >> loading file special_tokens_map.json from cache at None
[INFO|tokenization_utils_base.py:2012] 2025-07-18 20:00:32,219 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--distilbert-base-multilingual-cased/snapshots/45c032ab32cc946ad88a166f7cb282f58c753c2e/tokenizer_config.json
[INFO|tokenization_u

model.safetensors:   0%|          | 0.00/542M [00:00<?, ?B/s]

[INFO|modeling_utils.py:1267] 2025-07-18 20:00:40,861 >> loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--distilbert-base-multilingual-cased/snapshots/45c032ab32cc946ad88a166f7cb282f58c753c2e/model.safetensors
[INFO|modeling_utils.py:5374] 2025-07-18 20:00:41,086 >> Some weights of the model checkpoint at distilbert-base-multilingual-cased were not used when initializing DistilBertForSequenceClassification: ['vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_projector.bias', 'vocab_transform.bias', 'vocab_transform.weight']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing

#### Preprocessing the raw_datasets

In [15]:
non_label_column_names = [name for name in raw_datasets["train"].column_names if name != "label"]
sentence1_key= non_label_column_names[1]

# Padding strategy
padding = "max_length"

# Some models have set the order of the labels to use, so let's make sure we do use it.
label_to_id = None
if (model.config.label2id != PretrainedConfig(num_labels=num_labels).label2id):
    # Some have all caps in their config, some don't.
    label_name_to_id = {k.lower(): v for k, v in model.config.label2id.items()}
    if sorted(label_name_to_id.keys()) == sorted(label_list):
        label_to_id = {i: int(label_name_to_id[label_list[i]]) for i in range(num_labels)}
    else:
        logger.warning(
            "Your model seems to have been trained with labels, but they don't match the dataset: ",
            f"model labels: {sorted(label_name_to_id.keys())}, dataset labels: {sorted(label_list)}."
            "\nIgnoring the model labels as a result.",)

if label_to_id is not None:
    model.config.label2id = label_to_id
    model.config.id2label = {id: label for label, id in config.label2id.items()}

if 128 > tokenizer.model_max_length:
    logger.warning(
        f"The max_seq_length passed ({128}) is larger than the maximum length for the"
        f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}.")
max_seq_length = min(128, tokenizer.model_max_length)

def preprocess_function(examples):
    # Tokenize the texts
    args = (
        (examples[sentence1_key],))
    result = tokenizer(*args, padding=padding, max_length=max_seq_length, truncation=True)

    # Map labels to IDs (not necessary for GLUE tasks)
    if label_to_id is not None and "label" in examples:
        result["label"] = [(label_to_id[l] if l != -1 else -1) for l in examples["label"]]
    return result
raw_datasets = raw_datasets.map(
    preprocess_function,
    batched=True,
    load_from_cache_file=True,
    desc="Running tokenizer on dataset",
)


Running tokenizer on dataset:   0%|          | 0/2143 [00:00<?, ? examples/s]

Running tokenizer on dataset:   0%|          | 0/312 [00:00<?, ? examples/s]

Running tokenizer on dataset:   0%|          | 0/606 [00:00<?, ? examples/s]

#### Finalize the training data for training the model

In [16]:
if "train" not in raw_datasets:
    raise ValueError("requires a train dataset")
train_dataset = raw_datasets["train"]
if max_train_samples is not None:
    max_train_samples_n = min(len(train_dataset), max_train_samples)
    train_dataset = train_dataset.select(range(max_train_samples_n))

In [17]:
train_dataset

Dataset({
    features: ['id', 'text', 'img_path', 'label', 'input_ids', 'attention_mask'],
    num_rows: 2143
})

#### Finalize the development/evaluation data for evaluating the model

In [18]:
if "validation" not in raw_datasets:
    raise ValueError("requires a validation dataset")
eval_dataset = raw_datasets["validation"]
if max_eval_samples is not None:
    max_eval_samples_n = min(len(eval_dataset), max_eval_samples)
    eval_dataset = eval_dataset.select(range(max_eval_samples_n))

#### Finalize the test data for predicting the unseen test data using the model

In [19]:
if "test" not in raw_datasets and "test_matched" not in raw_datasets:
    raise ValueError("requires a test dataset")
predict_dataset = raw_datasets["test"]
if max_predict_samples is not None:
    max_predict_samples_n = min(len(predict_dataset), max_predict_samples)
    predict_dataset = predict_dataset.select(range(max_predict_samples_n))

#### Log a few random samples from the training set

In [20]:
for index in random.sample(range(len(train_dataset)), 3):
    logger.info(f"Sample {index} of the training set: {train_dataset[index]}.")

INFO:__main__:Sample 456 of the training set: {'id': 'data/arabic_memes_fb_insta_pinterest/Facebook/images/salsamemers/333730289_873615157259879_2315221019976095254_n.jpg', 'text': 'المسيخ الدجال جا يحكم اليمن. \nلشعب اليمني: \nشكرا دجال الخير\nمالنا الا المسيخ\nاضرب بنا البحر ياسيدي الاعور ', 'img_path': './data/arabic_memes_fb_insta_pinterest/Facebook/images/salsamemers/333730289_873615157259879_2315221019976095254_n.jpg', 'label': 1, 'input_ids': [101, 59901, 73261, 48665, 59901, 10658, 39515, 10961, 39274, 793, 49095, 31760, 119, 787, 11626, 20907, 31760, 10461, 131, 776, 39337, 10429, 771, 39515, 10961, 59901, 16498, 15089, 12441, 66803, 10429, 59901, 10429, 59901, 73261, 48665, 763, 93199, 35364, 30352, 60844, 26341, 18914, 59901, 21337, 12379, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

#### Get the metric function `accuracy`

In [21]:
metric = evaluate.load("accuracy")

Downloading builder script: 0.00B [00:00, ?B/s]

#### Predictions and label_ids field and has to return a dictionary string to float.

In [22]:
def compute_metrics(p: EvalPrediction):
    preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
    preds = np.argmax(preds, axis=1)
    return {"accuracy": (preds == p.label_ids).astype(np.float32).mean().item()}


#### Data Collator

In [23]:
data_collator = default_data_collator

#### Initialize our Trainer

In [24]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    # eval_dataset=eval_dataset, # if you have development and test set, uncomment this line
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

  trainer = Trainer(


#### Training our model

In [25]:
train_result = trainer.train()
metrics = train_result.metrics
max_train_samples = (
    max_train_samples if max_train_samples is not None else len(train_dataset)
)
metrics["train_samples"] = min(max_train_samples, len(train_dataset))



[INFO|trainer.py:2402] 2025-07-18 20:00:49,444 >> ***** Running training *****
[INFO|trainer.py:2403] 2025-07-18 20:00:49,445 >>   Num examples = 2,143
[INFO|trainer.py:2404] 2025-07-18 20:00:49,463 >>   Num Epochs = 1
[INFO|trainer.py:2405] 2025-07-18 20:00:49,465 >>   Instantaneous batch size per device = 16
[INFO|trainer.py:2408] 2025-07-18 20:00:49,467 >>   Total train batch size (w. parallel, distributed & accumulation) = 16
[INFO|trainer.py:2409] 2025-07-18 20:00:49,468 >>   Gradient Accumulation steps = 1
[INFO|trainer.py:2410] 2025-07-18 20:00:49,471 >>   Total optimization steps = 134
[INFO|trainer.py:2411] 2025-07-18 20:00:49,474 >>   Number of trainable parameters = 135,326,210
[INFO|integration_utils.py:832] 2025-07-18 20:00:49,483 >> Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mfirojalam04[0m ([33mtanbih[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss


[INFO|trainer.py:2677] 2025-07-18 20:01:24,595 >> 

Training completed. Do not forget to share your model on huggingface.co/models =)




#### Saving the tokenizer too for easy upload

In [26]:
trainer.save_model()
trainer.log_metrics("train", metrics)
trainer.save_metrics("train", metrics)
trainer.save_state()

[INFO|trainer.py:4019] 2025-07-18 20:01:24,648 >> Saving model checkpoint to ./distilBERT_m/
[INFO|configuration_utils.py:437] 2025-07-18 20:01:24,652 >> Configuration saved in ./distilBERT_m/config.json
[INFO|modeling_utils.py:3949] 2025-07-18 20:01:36,848 >> Model weights saved in ./distilBERT_m/model.safetensors
[INFO|tokenization_utils_base.py:2507] 2025-07-18 20:01:36,854 >> tokenizer config file saved in ./distilBERT_m/tokenizer_config.json
[INFO|tokenization_utils_base.py:2516] 2025-07-18 20:01:36,855 >> Special tokens file saved in ./distilBERT_m/special_tokens_map.json


***** train metrics *****
  epoch                    =        1.0
  total_flos               =    66095GF
  train_loss               =     0.3242
  train_runtime            = 0:00:35.16
  train_samples            =       2143
  train_samples_per_second =     60.948
  train_steps_per_second   =      3.811


#### Evaluating our model on validation/development data

In [27]:
logger.info("*** Evaluate ***")

metrics = trainer.evaluate(eval_dataset=eval_dataset)

max_eval_samples = (
    max_eval_samples if max_eval_samples is not None else len(eval_dataset)
)
metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset))

trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)

INFO:__main__:*** Evaluate ***
[INFO|trainer.py:4353] 2025-07-18 20:01:36,943 >> 
***** Running Evaluation *****
[INFO|trainer.py:4355] 2025-07-18 20:01:36,943 >>   Num examples = 312
[INFO|trainer.py:4358] 2025-07-18 20:01:36,945 >>   Batch size = 16


***** eval metrics *****
  epoch                   =        1.0
  eval_accuracy           =     0.9006
  eval_loss               =     0.3035
  eval_runtime            = 0:00:01.13
  eval_samples            =        312
  eval_samples_per_second =    274.725
  eval_steps_per_second   =     17.611


### Predecting the test data

In [31]:
# if the test set is available, you don't need to run this cell
predict_dataset = eval_dataset

In [32]:
id2l = {0:'not-hateful', 1:'hateful'}
logger.info("*** Predict ***")
#predict_dataset = predict_dataset.remove_columns("label")
ids = predict_dataset['id']
predict_dataset = predict_dataset.remove_columns("id")
predictions = trainer.predict(predict_dataset, metric_key_prefix="predict").predictions
predictions = np.argmax(predictions, axis=1)
output_predict_file = os.path.join(training_args.output_dir, f"task2A_TeamName.tsv")
if trainer.is_world_process_zero():
    with open(output_predict_file, "w") as writer:
        logger.info(f"***** Predict results *****")
        writer.write("id\tlabel\trun_id\n")
        for index, item in enumerate(predictions):
            item = label_list[item]
            item = id2l[item]
            writer.write(f"{ids[index]}\t{item}\t{model_name}\n")

INFO:__main__:*** Predict ***
[INFO|trainer.py:4353] 2025-07-18 20:02:24,263 >> 
***** Running Prediction *****
[INFO|trainer.py:4355] 2025-07-18 20:02:24,264 >>   Num examples = 312
[INFO|trainer.py:4358] 2025-07-18 20:02:24,265 >>   Batch size = 16


INFO:__main__:***** Predict results *****


In [33]:
ids[0]

'data/arabic_memes_fb_insta_pinterest/Pinterest/images/pinterest_images_part2/www.pinterest.com_pin_69242912999681646/feb274cfb5b9d937bbd1e48e015c071e.jpg'

#### Saving the model into card

In [34]:
kwargs = {"finetuned_from": model_name, "tasks": "text-classification"}
trainer.create_model_card(**kwargs)

[INFO|modelcard.py:450] 2025-07-18 20:06:57,541 >> Dropping the following result as it does not have all the necessary fields:
{'task': {'name': 'Text Classification', 'type': 'text-classification'}, 'metrics': [{'name': 'Accuracy', 'type': 'accuracy', 'value': 0.9006410241127014}]}
