In [None]:
import warnings

import evaluate
import numpy as np
import pandas as pd
import torch
from datasets import Dataset
from pandarallel import pandarallel
from transformers import (
    AutoModel,
    AutoTokenizer,
    DataCollatorWithPadding,
    Trainer,
    TrainingArguments,
)

warnings.filterwarnings("ignore", category=FutureWarning)
pandarallel.initialize(progress_bar=True, nb_workers=16)


model_save_path = "../process/bert-baseline/category-binary/"

device = "cuda"  # the device to load the model onto

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
tokenizer.truncation_side = "left"
model = AutoModel.from_pretrained(
    "distilbert-base-uncased",
    torch_dtype=torch.float16,
    attn_implementation="flash_attention_2",
    problem_type="multi_label_classification",
)

model.to(device)


def preprocess_function(examples):
    return tokenizer(examples["text"], truncation=True)


# 1. accu
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)


# 2. f1
# def compute_metrics(eval_pred):
#     predictions, labels = eval_pred
#     predictions = np.argmax(predictions, axis=1)
#     return evaluate.load("f1").compute(predictions=predictions, references=labels)


# [] TODO: AUROC
# auroc
# def compute_metrics(eval_pred):
#     predictions, labels = eval_pred
#     predictions = np.argmax(predictions, axis=1)
#     return evaluate.load("auroc").compute(predictions=predictions, references=labels)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

accuracy = evaluate.load("accuracy")

Traning set:

In [None]:
df_train = pd.read_csv(
    "../dataset/process/hateful_memes_gpt_scale/df_train_vision_scale.csv"
).drop(columns=["scale_text"])
df_train["text_org"] = df_train["text"]

df_train["text"] = (
    """Your task is to analyze this given image and its caption to identify if there’s any forms of hateful content. Try to focus on the presence of any element that relates to any of the following:\n1. Sexual aggression:\na. Homophobia and Transphobia: This category encompasses hate speech targeting LGBTQ+ individuals, including slurs, derogatory comments, and rhetoric that seeks to undermine or dehumanize people based on their sexual orientation or gender identity.\nb. Misogyny and Sexism: This category includes hate speech directed at women or based on gender. It covers derogatory language, stereotypes, and rhetoric that perpetuate gender inequality, objectification, and violence against women.\n2. Hate based on ideology:\na. Political Hate Speech: This category includes hate speech that is politically motivated, often targeting individuals or groups based on their political beliefs or affiliations. It may include inflammatory language, threats, and rhetoric designed to polarize or incite violence within political contexts.\n3. Racism and xenophobia:\na. COVID-19 and Xenophobia: This category includes hate speech that arose during the COVID-19 pandemic, often targeting specific ethnic groups or nationalities. It includes xenophobic language blaming certain groups for the spread of the virus, as well as fear-mongering and scapegoating related to the pandemic.\nb. Racism Against Black People: This category focuses on hate speech directed at Black individuals or communities. It includes racial slurs, stereotypes, dehumanization, and other forms of derogatory language that perpetuate racial discrimination and inequality.\nc. Racist Hate Against Other Ethnic Groups: This category includes hate speech directed at various ethnic groups other than Black individuals. It covers a range of racial slurs, xenophobic language, dehumanization, and derogatory remarks targeting specific ethnicities or nationalities.\nd. White Supremacy: This category includes hate speech promoting white supremacist ideology, often intertwined with Christian extremist views. It includes rhetoric that advocates for racial superiority, anti-immigrant sentiments, and the dehumanization of non-white groups, sometimes using religious justifications for these beliefs.\n4. Bigotry:\na. Anti-Muslim and Islamophobic Hate: This category comprises hate speech aimed at Muslims and Islam. It includes language that promotes fear, hatred, dehumanization, or prejudice against Muslim individuals or communities, often using religious and cultural references to incite hostility.\nb. Anti-Semitic Hate: This category focuses on hate speech directed at Jewish people and Judaism. It includes references to historical anti-Semitic tropes, conspiracy theories, and other forms of rhetoric that seek to dehumanize or discriminate against Jewish individuals and communities.\n5. Miscellaneous Hate Speech: This category captures various forms of hate speech that do not fit neatly into the other categories. It includes a wide range of derogatory language and expressions that target individuals or groups based on different aspects of identity or personal characteristics. This category includes hate speech that targets individuals based on their physical or mental disabilities. It often includes derogatory language that mocks or devalues people with disabilities, promoting harmful stereotypes and exclusion. The caption is about `"""
    + df_train["text"]
    + "`. If it’s hateful, return the `TRUE` else `FALSE`."
)
# df_train["text"] = df_train["text"].apply(
#     lambda x: x[:-512] if len(x) > 512 else x
# )  # truncate
df_train["img"] = df_train["img"].apply(lambda x: x.split("/")[-1])
df_train["text"] = df_train["text"].apply(
    lambda x: x[-512:] if len(x) > 512 else x
)  # truncate
# df_train = df_train.rename(columns={"scale": "label"})
# df_train["label"] = df_train["label"].apply(lambda x: 1 if x >= 5 else 0)
df_train["label"] = df_train["label"].astype(int)
df_train = df_train[["text", "label"]]
df_train.head()

Test set:

In [None]:
df_test = pd.read_json(
    "../dataset/raw/hateful_memes_expanded/test_seen.jsonl",
    lines=True,
)
df_test = pd.concat(
    [
        df_test,
        pd.read_json(
            "../dataset/raw/hateful_memes_expanded/test_unseen.jsonl",
            lines=True,
        ),
    ]
).drop(columns=["id", "img"])[["text", "label"]]
df_test["text_org"] = df_test["text"]

df_test["text"] = (
    """Your task is to analyze this given image and its caption to identify if there’s any forms of hateful content. Try to focus on the presence of any element that relates to any of the following:\n1. Sexual aggression:\na. Homophobia and Transphobia: This category encompasses hate speech targeting LGBTQ+ individuals, including slurs, derogatory comments, and rhetoric that seeks to undermine or dehumanize people based on their sexual orientation or gender identity.\nb. Misogyny and Sexism: This category includes hate speech directed at women or based on gender. It covers derogatory language, stereotypes, and rhetoric that perpetuate gender inequality, objectification, and violence against women.\n2. Hate based on ideology:\na. Political Hate Speech: This category includes hate speech that is politically motivated, often targeting individuals or groups based on their political beliefs or affiliations. It may include inflammatory language, threats, and rhetoric designed to polarize or incite violence within political contexts.\n3. Racism and xenophobia:\na. COVID-19 and Xenophobia: This category includes hate speech that arose during the COVID-19 pandemic, often targeting specific ethnic groups or nationalities. It includes xenophobic language blaming certain groups for the spread of the virus, as well as fear-mongering and scapegoating related to the pandemic.\nb. Racism Against Black People: This category focuses on hate speech directed at Black individuals or communities. It includes racial slurs, stereotypes, dehumanization, and other forms of derogatory language that perpetuate racial discrimination and inequality.\nc. Racist Hate Against Other Ethnic Groups: This category includes hate speech directed at various ethnic groups other than Black individuals. It covers a range of racial slurs, xenophobic language, dehumanization, and derogatory remarks targeting specific ethnicities or nationalities.\nd. White Supremacy: This category includes hate speech promoting white supremacist ideology, often intertwined with Christian extremist views. It includes rhetoric that advocates for racial superiority, anti-immigrant sentiments, and the dehumanization of non-white groups, sometimes using religious justifications for these beliefs.\n4. Bigotry:\na. Anti-Muslim and Islamophobic Hate: This category comprises hate speech aimed at Muslims and Islam. It includes language that promotes fear, hatred, dehumanization, or prejudice against Muslim individuals or communities, often using religious and cultural references to incite hostility.\nb. Anti-Semitic Hate: This category focuses on hate speech directed at Jewish people and Judaism. It includes references to historical anti-Semitic tropes, conspiracy theories, and other forms of rhetoric that seek to dehumanize or discriminate against Jewish individuals and communities.\n5. Miscellaneous Hate Speech: This category captures various forms of hate speech that do not fit neatly into the other categories. It includes a wide range of derogatory language and expressions that target individuals or groups based on different aspects of identity or personal characteristics. This category includes hate speech that targets individuals based on their physical or mental disabilities. It often includes derogatory language that mocks or devalues people with disabilities, promoting harmful stereotypes and exclusion. The caption is about `"""
    + df_train["text"]
    + "`. If it’s hateful, return the `TRUE` else `FALSE`."
)

df_test["text"] = df_test["text"].apply(
    lambda x: x[-512:] if len(x) > 512 else x
)  # truncate
df_test["label"] = df_test["label"].astype(int)
# df_test["img"] = df_test["img"].apply(lambda x: x.split("/")[-1])
df_test.head()

In [None]:
from sklearn.model_selection import train_test_split

df_train, df_dev = train_test_split(df_train, test_size=0.3, random_state=42)
# df_train = Dataset.from_pandas(df_train)
# df_dev = Dataset.from_pandas(df_dev)
# df_test = Dataset.from_pandas(df_test)
# print(df_train.shape, df_dev.shape, df_test.shape)
print(df_train.shape, df_test.shape)

In [None]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(
    "distilbert-base-uncased",
    num_labels=2,
    # id2label=id2label,
    # label2id=label2id,
)
training_args = TrainingArguments(
    output_dir=model_save_path,
    learning_rate=2e-5,
    per_device_train_batch_size=12,
    per_device_eval_batch_size=12,
    num_train_epochs=5,
    weight_decay=0.01,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
)


def tokenize_function(examples):
    # return tokenizer(examples["text"], padding="max_length", truncation=True)
    return tokenizer(examples["text"], padding="max_length", truncation=True)


# Convert pd.DataFrame to Hugging Face Dataset using `datasets`
train_dataset = Dataset.from_pandas(df_train)
dev_dataset = Dataset.from_pandas(df_dev)
test_dataset = Dataset.from_pandas(df_test)
train_dataset = train_dataset.map(tokenize_function, batched=True)
dev_dataset = dev_dataset.map(tokenize_function, batched=True)
test_dataset = test_dataset.map(tokenize_function, batched=True)
# Set the format of the dataset to be compatible with PyTorch/TensorFlow
train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
dev_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    # return accuracy.compute(predictions=predictions, references=labels)
    accuracy = evaluate.load("accuracy").compute(
        predictions=predictions, references=labels
    )["accuracy"]
    precision = evaluate.load("precision").compute(
        predictions=predictions, references=labels
    )["precision"]
    recall = evaluate.load("recall").compute(
        predictions=predictions, references=labels
    )["recall"]
    f1 = evaluate.load("f1").compute(predictions=predictions, references=labels)["f1"]
    # auroc = evaluate.load("roc_auc", "multiclass").compute(
    #     prediction_scores=predictions, references=labels
    # )["roc_auc"]

    return {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1": f1,
        # "auroc": auroc,
    }


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=dev_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

In [None]:
predictions = trainer.predict(test_dataset)
preds = np.argmax(predictions.predictions, axis=-1)
preds[:20]

In [None]:
df_test["preds"] = preds
df_test["preds_label"] = df_test["preds"].copy()
df_test.to_csv("../result/bert.finetuning/category-binary/preds.csv", index=False)
df_test.head()

## Evaluate

In [None]:
# Accuracy
accuracy = evaluate.load("accuracy").compute(
    predictions=df_test["preds_label"], references=df_test["label"]
)

# Precision
precision = evaluate.load("precision").compute(
    predictions=df_test["preds_label"], references=df_test["label"]
)

# Recall
recall = evaluate.load("recall").compute(
    predictions=df_test["preds_label"], references=df_test["label"]
)

# F1
f1 = evaluate.load("f1").compute(
    predictions=df_test["preds_label"], references=df_test["label"]
)
# AUROC
auroc = evaluate.load("roc_auc").compute(
    prediction_scores=df_test["preds_label"], references=df_test["label"]
)["roc_auc"]

In [None]:
from rich import print as pp

# RESULTS DICT
results = {
    "accuracy": accuracy,
    "precision": precision,
    "recall": recall,
    "f1": f1,
    "auroc": auroc,
}
pp(results)