# [Task 3: Multimodal Hate Speech Detection in Memes](https://github.com/marsadlab/MAHED2025Dataset/tree/main/task3/) at [ArabicNLP 2025](http://arabicnlp2025.sigarab.org/) @EMNLP 2025


Given multimodal content (text extracted from meme and the meme itself) the task is to detect whether the content is hateful or not-hateful. This is a binary classification task.

### installing required libraries.
 - transformers
 - datasets
 - evaluate
 - accelerate

In [None]:
!pip install transformers
!pip install datasets
!pip install evaluate
!pip install --upgrade accelerate
!pip install -U datasets

#### importing required libraries and setting up logger

In [None]:
import logging
import os
import random
import sys
from dataclasses import dataclass, field
from typing import Optional
import pandas as pd
import datasets
import evaluate
import numpy as np
from torchvision.transforms import Compose, Normalize, ToTensor, Resize, CenterCrop
from datasets import load_dataset, Dataset, DatasetDict
import torch

import transformers
from transformers import (
    ConvNextFeatureExtractor,
    ResNetConfig,
    ResNetForImageClassification,
    DataCollatorWithPadding,
    EvalPrediction,
    HfArgumentParser,
    PretrainedConfig,
    Trainer,
    TrainingArguments,
    default_data_collator,
    set_seed,
)
from transformers.trainer_utils import get_last_checkpoint
from transformers.utils import check_min_version, send_example_telemetry
from transformers.utils.versions import require_version


logger = logging.getLogger(__name__)

logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
    datefmt="%m/%d/%Y %H:%M:%S",
    handlers=[logging.StreamHandler(sys.stdout)],
)

### Setting up the training parameters

In [None]:
training_args = TrainingArguments(
    learning_rate=2e-5,
    num_train_epochs=2,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    output_dir="./resnet_50/",
    overwrite_output_dir=True,
    remove_unused_columns=False,
    local_rank= 1,
    load_best_model_at_end=True,
    save_total_limit=2,
    save_strategy="no"
)

max_train_samples = None
max_eval_samples=None
max_predict_samples=None
batch_size = 16

In [None]:
transformers.utils.logging.set_verbosity_info()

log_level = training_args.get_process_log_level()
logger.setLevel(log_level)
datasets.utils.logging.set_verbosity(log_level)
transformers.utils.logging.set_verbosity(log_level)
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()
logger.warning(
    f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
    + f" distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
)
logger.info(f"Training/evaluation parameters {training_args}")

#### Defining the Model

In [None]:
model_name = 'resnet50'

#### setting the random seed

In [None]:
set_seed(training_args.seed)

## Download data from HF: https://huggingface.co/datasets/QCRI/Prop2Hate-Meme
### Defining the training, validation, and test data

In [None]:
from datasets import load_dataset

dataset = load_dataset("QCRI/Prop2Hate-Meme")

# Specify the directory where you want to save the dataset

output_dir="./Prop2Hate-Meme"

# Save the dataset to the specified directory. This will save all splits to the output directory.
dataset.save_to_disk(output_dir)

# If you want to get the raw images from HF dataset format

from PIL import Image
import os
import json

# Directory to save the images
output_dir="./Prop2Hate-Meme/"
os.makedirs(output_dir, exist_ok=True)

# Iterate over the dataset and save each image
for split in ['train','dev','test']:
    jsonl_path = os.path.join(output_dir, f"arabic_hateful_meme_{split}.jsonl")
    with open(jsonl_path, 'w', encoding='utf-8') as f:
        for idx, item in enumerate(dataset[split]):
            # Access the image directly as it's already a PIL.Image object
            image = item['image']
            image_path = os.path.join(output_dir, item['img_path'])
            # Ensure the directory exists
            os.makedirs(os.path.dirname(image_path), exist_ok=True)
            image.save(image_path)
            del item['image']
            del item['prop_label']
            del item['hate_fine_grained_label']
            item['label'] = item.pop('hate_label')
            f.write(json.dumps(item, ensure_ascii=False) + '\n')

In [None]:
import os
os.chdir("Prop2Hate-Meme")

train_file = './arabic_hateful_meme_train.jsonl'
validation_file = './arabic_hateful_meme_dev.jsonl'
test_file = './arabic_hateful_meme_test.jsonl'

In [None]:
jsonl_path = "./arabic_hateful_meme_train.jsonl" # Example path, modify as needed
data = []
with open(jsonl_path, 'r', encoding='utf-8') as f:
  for line in f:
    data.append(json.loads(line))

# data is now a list of dictionaries, where each dictionary is a parsed JSON object from a line in the file.
print(f"Loaded {len(data)} entries from {jsonl_path}")
if data:
    print("First entry:")
data[0]

#### Loading data files

In [None]:
import pandas as pd
from datasets import Dataset, DatasetDict, Image

def read_jsonl_to_df(filename):
    return pd.read_json(filename, lines=True)

l2id = {'not-hate': 0, 'hate': 1}

# Assume all splits use "img_path" as the image column
def prepare_dataset(file):
    df = read_jsonl_to_df(file)
    # df['label'] = df['label'].map(l2id)
    # Cast "img_path" column as Image
    return Dataset.from_pandas(df).cast_column("img_path", Image())

train_dataset = prepare_dataset(train_file)
validation_dataset = prepare_dataset(validation_file)
test_dataset = prepare_dataset(test_file)

raw_datasets = DatasetDict(
    {"train": train_dataset, "validation": validation_dataset, "test": test_dataset}
)



##### Extracting number of unique labels

In [None]:
# Labels
label_list = raw_datasets["train"].unique("label")
label_list.sort()  # sort the labels for determine
num_labels = len(label_list)
label_list

### Loading Pretrained Configuration, Tokenizer and Model

In [None]:
config = ResNetConfig(
        num_channels=1,
        layer_type="basic",
        depths=[2, 2],
        hidden_sizes=[32, 64],
        num_labels=num_labels,
)

model = ResNetForImageClassification(config)

#### Preprocessing the raw_datasets

In [None]:
feature_extractor = ConvNextFeatureExtractor(
    do_resize=True, do_normalize=False, image_mean=[0.45], image_std=[0.22]
)
normalize = Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std)
_transforms = Compose([Resize(256), CenterCrop(224), ToTensor(), normalize])

def transforms(example_batch):
    """Apply _train_transforms across a batch."""
    # print(example_batch)
    # black and white
    example_batch["pixel_values"] = [_transforms(pil_img.convert("L")) for pil_img in example_batch["img_path"]]
    return example_batch

#### Finalize the training data for training the model

In [None]:
if "train" not in raw_datasets:
    raise ValueError("requires a train dataset")
train_dataset = raw_datasets["train"]
if max_train_samples is not None:
    max_train_samples_n = min(len(train_dataset), max_train_samples)
    train_dataset = train_dataset.select(range(max_train_samples_n))
train_dataset.set_transform(transforms)

In [None]:
train_dataset

#### Finalize the development/evaluation data for evaluating the model

In [None]:
if "validation" not in raw_datasets:
    raise ValueError("requires a validation dataset")
eval_dataset = raw_datasets["validation"]
if max_eval_samples is not None:
    max_eval_samples_n = min(len(eval_dataset), max_eval_samples)
    eval_dataset = eval_dataset.select(range(max_eval_samples_n))
eval_dataset.set_transform(transforms)
eval_dataset

#### Finalize the test data for predicting the unseen test data using the model

In [None]:
if "test" not in raw_datasets and "test_matched" not in raw_datasets:
    raise ValueError("requires a test dataset")
predict_dataset = raw_datasets["test"]
if max_predict_samples is not None:
    max_predict_samples_n = min(len(predict_dataset), max_predict_samples)
    predict_dataset = predict_dataset.select(range(max_predict_samples_n))
predict_dataset.set_transform(transforms)
predict_dataset

#### Log a few random samples from the training set

In [None]:
for index in random.sample(range(len(train_dataset)), 3):
    logger.info(f"Sample {index} of the training set: {train_dataset[index]}.")

#### Get the metric function `accuracy`

In [None]:
metric = evaluate.load("accuracy")

#### Predictions and label_ids field and has to return a dictionary string to float.

In [None]:
def compute_metrics(p: EvalPrediction):
    preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
    preds = np.argmax(preds, axis=1)
    return {"accuracy": (preds == p.label_ids).astype(np.float32).mean().item()}


#### Data Collator

In [None]:
def collate_fn(examples):
    # print(examples)
    pixel_values = torch.stack([example["pixel_values"] for example in examples])
    labels = torch.tensor([example["label"] for example in examples])
    return {"pixel_values": pixel_values, "labels": labels}
data_collator = collate_fn

#### Initialize our Trainer

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset, # if you have development and test set, uncomment this line
    compute_metrics=compute_metrics,
    tokenizer=feature_extractor,
    data_collator=data_collator,
)

#### Training our model

In [None]:
train_result = trainer.train()
metrics = train_result.metrics
max_train_samples = (
    max_train_samples if max_train_samples is not None else len(train_dataset)
)
metrics["train_samples"] = min(max_train_samples, len(train_dataset))



#### Saving the tokenizer too for easy upload

In [None]:
trainer.save_model()
trainer.log_metrics("train", metrics)
trainer.save_metrics("train", metrics)
trainer.save_state()

#### Evaluating our model on validation/development data

In [None]:
logger.info("*** Evaluate ***")

metrics = trainer.evaluate(eval_dataset=eval_dataset)

max_eval_samples = (
    max_eval_samples if max_eval_samples is not None else len(eval_dataset)
)
metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset))

trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)

### Predecting the test data

In [None]:
# if the test set is available, you don't need to run this cell
# predict_dataset = eval_dataset


In [None]:
id2l = {0:'not-hate', 1:'hate'}
logger.info("*** Predict ***")

predictions = trainer.predict(predict_dataset, metric_key_prefix="predict").predictions
predictions = np.argmax(predictions, axis=1)
output_predict_file = os.path.join(training_args.output_dir, f"task2_TeamName.csv")
if trainer.is_world_process_zero():
    with open(output_predict_file, "w") as writer:
        logger.info(f"***** Predict results *****")
        writer.write("id\tprediction\n")
        for index, item in enumerate(predictions):
            item = label_list[item]
            item = id2l[item]
            writer.write(f"{predict_dataset[index]['id']}\t{item}\n")

#### Saving the model into card

In [None]:
kwargs = {"finetuned_from": model_name, "tasks": "image-classification"}
trainer.create_model_card(**kwargs)