# [Hate Speech Identification Shared Task](https://multihate.github.io/): Subtask 1A at [BLP Workshop](https://blp-workshop.github.io/) @IJCNLP-AACL 2025

This shared task is designed to identify the type of hate, its severity, and the targeted group from social media content. The goal is to develop robust systems that advance research in this area.

In this subtask, given a Bangla text collected from YouTube comments, categorize whether it contains abusive, sexism, religious hate, political hate, profane, or none.

In [1]:
!rm -f blp25_hatespeech_subtask_1A_train.tsv blp25_hatespeech_subtask_1A_dev.tsv blp25_hatespeech_subtask_1A_dev_test.tsv

### Downloading dataset from github

In [2]:
!wget https://raw.githubusercontent.com/md-fahad-ali/do-your-home-work/refs/heads/main/blp25_hatespeech_subtask_1A_train.tsv
!wget https://raw.githubusercontent.com/AridHasan/blp25_task1/refs/heads/main/data/subtask_1A/blp25_hatespeech_subtask_1A_dev.tsv
!wget https://raw.githubusercontent.com/AridHasan/blp25_task1/refs/heads/main/data/subtask_1A/blp25_hatespeech_subtask_1A_dev_test.tsv

--2025-08-31 17:16:10--  https://raw.githubusercontent.com/md-fahad-ali/do-your-home-work/refs/heads/main/blp25_hatespeech_subtask_1A_train.tsv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 8016549 (7.6M) [text/plain]
Saving to: ‘blp25_hatespeech_subtask_1A_train.tsv’


2025-08-31 17:16:10 (114 MB/s) - ‘blp25_hatespeech_subtask_1A_train.tsv’ saved [8016549/8016549]

--2025-08-31 17:16:10--  https://raw.githubusercontent.com/AridHasan/blp25_task1/refs/heads/main/data/subtask_1A/blp25_hatespeech_subtask_1A_dev.tsv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awai

### installing required libraries.
 - transformers
 - datasets
 - evaluate
 - accelerate

In [3]:
!pip install transformers
!pip install datasets
!pip install evaluate
# !pip install --upgrade accelerate

Collecting evaluate
  Downloading evaluate-0.4.5-py3-none-any.whl.metadata (9.5 kB)
Downloading evaluate-0.4.5-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.5


#### importing required libraries and setting up logger

In [4]:
import logging
import os
import random
import sys
from dataclasses import dataclass, field
from typing import Optional
import pandas as pd
import datasets
import evaluate
import numpy as np
from datasets import load_dataset, Dataset, DatasetDict
import torch

import transformers
from transformers import (
    AutoConfig,
    AutoModelForSequenceClassification,
    AutoTokenizer,
    DataCollatorWithPadding,
    EvalPrediction,
    HfArgumentParser,
    PretrainedConfig,
    Trainer,
    TrainingArguments,
    default_data_collator,
    set_seed,
)
from transformers.trainer_utils import get_last_checkpoint
from transformers.utils import check_min_version, send_example_telemetry
from transformers.utils.versions import require_version


logger = logging.getLogger(__name__)

logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
    datefmt="%m/%d/%Y %H:%M:%S",
    handlers=[logging.StreamHandler(sys.stdout)],
)

### Defining the training, validation, and test data

In [5]:
train_file = 'blp25_hatespeech_subtask_1A_train.tsv'
validation_file = 'blp25_hatespeech_subtask_1A_dev.tsv'
test_file = 'blp25_hatespeech_subtask_1A_dev_test.tsv'

### Disable wandb

In [6]:
import os
os.environ["WANDB_DISABLED"] = "true"

### Setting up the training parameters

In [7]:
training_args = TrainingArguments(
    learning_rate=2e-5,
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    output_dir="./distilBERT_m/",
    overwrite_output_dir=True,
    remove_unused_columns=False,
    local_rank=-1,  # FIXED
    load_best_model_at_end=True,
    save_total_limit=2,
    save_strategy="epoch",
    eval_strategy="epoch", # Fixed: Changed from evaluation_strategy
    report_to=[],  # FIXED
    metric_for_best_model="eval_accuracy",
    greater_is_better=True,
)

In [8]:
transformers.utils.logging.set_verbosity_info()

log_level = training_args.get_process_log_level()
logger.setLevel(log_level)
datasets.utils.logging.set_verbosity(log_level)
transformers.utils.logging.set_verbosity(log_level)
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()
logger.warning(
    f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
    + f" distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
)
logger.info(f"Training/evaluation parameters {training_args}")

INFO:__main__:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
average_tokens_across_devices=False,
batch_eval_metrics=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_persistent_workers=False,
dataloader_pin_memory=True,
dataloader_prefetch_factor=None,
ddp_backend=None,
ddp_broadcast_buffers=None,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=False,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_do_concat_batches=True,
eval_on_start=False,
eval_steps=None,
eval_strategy=Interva

#### Defining the Model

In [9]:
model_name = 'distilbert-base-multilingual-cased'

#### setting the random seed

In [10]:
set_seed(training_args.seed)

#### Loading data files

In [11]:
l2id = {'None': 0, 'Religious Hate': 1, 'Sexism': 2, 'Political Hate': 3, 'Profane': 4, 'Abusive': 5}
train_df = pd.read_csv(train_file, sep='\t')
# print(train_df['label'])
train_df['label'] = train_df['label'].map(l2id).fillna(0).astype(int)
train_df = Dataset.from_pandas(train_df)
validation_df = pd.read_csv(validation_file, sep='\t')
validation_df['label'] = validation_df['label'].map(l2id).fillna(0).astype(int)
validation_df = Dataset.from_pandas(validation_df)
test_df = pd.read_csv(test_file, sep='\t')
#test_df['label'] = test_df['label'].map(l2id)
test_df = Dataset.from_pandas(test_df)

data_files = {"train": train_df, "validation": validation_df, "test": test_df}
for key in data_files.keys():
    logger.info(f"loading a local file for {key}")
raw_datasets = DatasetDict(
    {"train": train_df, "validation": validation_df, "test": test_df}
)

INFO:__main__:loading a local file for train
INFO:__main__:loading a local file for validation
INFO:__main__:loading a local file for test


In [12]:
len(test_df['id'])

2512

##### Extracting number of unique labels

In [13]:
# Labels
label_list = raw_datasets["train"].unique("label")
print(label_list)
label_list.sort()  # sort the labels for determine
num_labels = len(label_list)

[0, 5, 4, 1, 3, 2]


In [None]:
from huggingface_hub import notebook_login
notebook_login()


### Loading Pretrained Configuration, Tokenizer and Model

In [15]:
unique_labels = set(raw_datasets["train"]["label"]) | set(raw_datasets["validation"]["label"])
num_labels = len(unique_labels)

print("Detected num_labels:", num_labels)

Detected num_labels: 6


In [16]:
from transformers import AutoConfig, AutoTokenizer, ElectraForSequenceClassification

model_name = "csebuetnlp/banglabert"

config = AutoConfig.from_pretrained(
    model_name,
    num_labels=num_labels,
    finetuning_task=None,
    cache_dir=None,
    revision="main",
    use_auth_token=None,
)

tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    cache_dir=None,
    use_fast=True,
    revision="main",
    use_auth_token=None,
)

model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    from_tf=bool(".ckpt" in model_name),
    config=config,
    cache_dir=None,
    revision="main",
    use_auth_token=None,
    ignore_mismatched_sizes=False,
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/586 [00:00<?, ?B/s]

[INFO|configuration_utils.py:752] 2025-08-31 17:17:30,230 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--csebuetnlp--banglabert/snapshots/9ce791f330578f50da6bc52b54205166fb5d1c8c/config.json
[INFO|configuration_utils.py:817] 2025-08-31 17:17:30,233 >> Model config ElectraConfig {
  "architectures": [
    "ElectraForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "embedding_size": 768,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "electra",
  "num_attention_heads": 12,
  "num

tokenizer_config.json:   0%|          | 0.00/119 [00:00<?, ?B/s]

[INFO|configuration_utils.py:752] 2025-08-31 17:17:30,654 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--csebuetnlp--banglabert/snapshots/9ce791f330578f50da6bc52b54205166fb5d1c8c/config.json
[INFO|configuration_utils.py:817] 2025-08-31 17:17:30,661 >> Model config ElectraConfig {
  "architectures": [
    "ElectraForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "embedding_size": 768,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "electra",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "summary_activation": "gelu",
  "summary_last_dropout": 0.1,
  "summary_type": "first",
  "summary_use_proj": true,
  "transformers_version": "4.55.4",
  "type_vocab_size": 2,
  "use_

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

[INFO|tokenization_utils_base.py:2067] 2025-08-31 17:17:32,388 >> loading file vocab.txt from cache at /root/.cache/huggingface/hub/models--csebuetnlp--banglabert/snapshots/9ce791f330578f50da6bc52b54205166fb5d1c8c/vocab.txt
[INFO|tokenization_utils_base.py:2067] 2025-08-31 17:17:32,389 >> loading file tokenizer.json from cache at None
[INFO|tokenization_utils_base.py:2067] 2025-08-31 17:17:32,390 >> loading file added_tokens.json from cache at None
[INFO|tokenization_utils_base.py:2067] 2025-08-31 17:17:32,391 >> loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--csebuetnlp--banglabert/snapshots/9ce791f330578f50da6bc52b54205166fb5d1c8c/special_tokens_map.json
[INFO|tokenization_utils_base.py:2067] 2025-08-31 17:17:32,391 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--csebuetnlp--banglabert/snapshots/9ce791f330578f50da6bc52b54205166fb5d1c8c/tokenizer_config.json
[INFO|tokenization_utils_base.py:2067] 2025-08-

pytorch_model.bin:   0%|          | 0.00/443M [00:00<?, ?B/s]

[INFO|modeling_utils.py:1309] 2025-08-31 17:17:44,231 >> loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--csebuetnlp--banglabert/snapshots/9ce791f330578f50da6bc52b54205166fb5d1c8c/pytorch_model.bin
[INFO|safetensors_conversion.py:61] 2025-08-31 17:17:44,417 >> Attempting to create safetensors variant
[INFO|safetensors_conversion.py:74] 2025-08-31 17:17:45,475 >> Safetensors PR exists
[INFO|modeling_utils.py:5609] 2025-08-31 17:17:45,741 >> Some weights of the model checkpoint at csebuetnlp/banglabert were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense_prediction.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClas

#### Preprocessing the raw_datasets

In [17]:
non_label_column_names = [name for name in raw_datasets["train"].column_names if name != "label"]
sentence1_key= non_label_column_names[1]

# Padding strategy
padding = "max_length"

# Some models have set the order of the labels to use, so let's make sure we do use it.
label_to_id = None
if (model.config.label2id != PretrainedConfig(num_labels=num_labels).label2id):
    # Some have all caps in their config, some don't.
    label_name_to_id = {k.lower(): v for k, v in model.config.label2id.items()}
    if sorted(label_name_to_id.keys()) == sorted(label_list):
        label_to_id = {i: int(label_name_to_id[label_list[i]]) for i in range(num_labels)}
    else:
        logger.warning(
            "Your model seems to have been trained with labels, but they don't match the dataset: ",
            f"model labels: {sorted(label_name_to_id.keys())}, dataset labels: {sorted(label_list)}."
            "\nIgnoring the model labels as a result.",)

if label_to_id is not None:
    model.config.label2id = label_to_id
    model.config.id2label = {id: label for label, id in config.label2id.items()}

if 128 > tokenizer.model_max_length:
    logger.warning(
        f"The max_seq_length passed ({128}) is larger than the maximum length for the"
        f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}.")
max_seq_length = min(128, tokenizer.model_max_length)

def preprocess_function(examples):
    # Tokenize the texts
    args = (
        (examples[sentence1_key],))
    result = tokenizer(*args, padding=padding, max_length=max_seq_length, truncation=True)

    # Map labels to IDs (not necessary for GLUE tasks)
    if label_to_id is not None and "label" in examples:
        result["label"] = [(label_to_id[l] if l != -1 else -1) for l in examples["label"]]
    return result
raw_datasets = raw_datasets.map(
    preprocess_function,
    batched=True,
    load_from_cache_file=True,
    desc="Running tokenizer on dataset",
)


Running tokenizer on dataset:   0%|          | 0/35637 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/443M [00:00<?, ?B/s]

Running tokenizer on dataset:   0%|          | 0/2512 [00:00<?, ? examples/s]

Running tokenizer on dataset:   0%|          | 0/2512 [00:00<?, ? examples/s]

#### Finalize the training data for training the model

In [18]:
if "train" not in raw_datasets:
    raise ValueError("requires a train dataset")
train_dataset = raw_datasets["train"]
max_train_samples=None
if max_train_samples is not None:
    max_train_samples_n = min(len(train_dataset), max_train_samples)
    train_dataset = train_dataset.select(range(max_train_samples_n))
train_dataset = train_dataset.cast_column("label", datasets.ClassLabel(num_classes=num_labels, names=[str(i) for i in range(num_labels)]))
train_dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

Casting the dataset:   0%|          | 0/35637 [00:00<?, ? examples/s]

In [19]:
train_dataset

Dataset({
    features: ['id', 'text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 35637
})

#### Finalize the development/evaluation data for evaluating the model

In [20]:
if "validation" not in raw_datasets:
    raise ValueError("requires a validation dataset")
eval_dataset = raw_datasets["validation"]
max_eval_samples = None # Initialize max_eval_samples
if max_eval_samples is not None:
    max_eval_samples_n = min(len(eval_dataset), max_eval_samples)
    eval_dataset = eval_dataset.select(range(max_eval_samples_n))
eval_dataset = eval_dataset.cast_column("label", datasets.ClassLabel(num_classes=num_labels, names=[str(i) for i in range(num_labels)]))
eval_dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

Casting the dataset:   0%|          | 0/2512 [00:00<?, ? examples/s]

#### Finalize the test data for predicting the unseen test data using the model

In [43]:
if "test" not in raw_datasets and "test_matched" not in raw_datasets:
    raise ValueError("requires a test dataset")
predict_dataset = raw_datasets["test"]
max_predict_samples = None # Initialize max_predict_samples
if max_predict_samples is not None:
    max_predict_samples_n = min(len(predict_dataset), max_predict_samples)
    predict_dataset = predict_dataset.select(range(max_predict_samples_n))
# Test dataset does not have labels
# predict_dataset = predict_dataset.cast_column("label", datasets.ClassLabel(num_classes=num_labels, names=[str(i) for i in range(num_labels)]))

#### Log a few random samples from the training set

In [22]:
for index in random.sample(range(len(train_dataset)), 3):
    logger.info(f"Sample {index} of the training set: {train_dataset[index]}.")

INFO:__main__:Sample 7296 of the training set: {'label': tensor(3), 'input_ids': tensor([    2,  1990,     1,  3145,  2058,  9365,  4906, 13484,  3244,  3216,
         1845,  9768,   792,  4097,  3711,  2104,   949,  9365,  8932,  6812,
         2058,  1718,  1779,  2575,   925,   830,   889,  1810,   764,  1772,
         2068,   913,     1,   795,   830,  1772,  1449,  2314,   886,  2263,
         1052,  1070,  1206,   886, 11689, 22104,   411,   463,  2119,   996,
          792,   795,     3,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     

#### Get the metric function `accuracy`

In [23]:
from sklearn.metrics import accuracy_score

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    return {"accuracy": accuracy_score(labels, preds)}


#### Predictions and label_ids field and has to return a dictionary string to float.

In [24]:
def compute_metrics(p: EvalPrediction):
    preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
    preds = np.argmax(preds, axis=1)
    return {"accuracy": (preds == p.label_ids).astype(np.float32).mean().item()}


#### Data Collator

In [25]:
data_collator = default_data_collator

#### Initialize our Trainer

In [26]:
train_dataset = train_dataset.remove_columns("id")
eval_dataset = eval_dataset.remove_columns("id")

In [27]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
    processing_class=tokenizer,  # FIXED
    data_collator=data_collator,
)

#### Training our model

In [28]:
print("Train label range:", min(train_dataset['label']), max(train_dataset['label']))
print("Eval label range:", min(eval_dataset['label']), max(eval_dataset['label']))


Train label range: tensor(0) tensor(5)
Eval label range: tensor(0) tensor(5)


In [29]:
sample = train_dataset[0]
tokens = tokenizer.decode(sample['input_ids'])
print(tokens)

[CLS] ধন্যবাদ বর্ডার গার্ড দেরকে এভাবে পাহারা দিতে হবে ভয় পেলে চলবে না তা না হলে আমাদের উপর হামলা করতে পারে [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]


In [30]:
train_dataset = train_dataset.remove_columns(
    [c for c in train_dataset.column_names if c not in ['input_ids', 'attention_mask', 'label']]
)
eval_dataset = eval_dataset.remove_columns(
    [c for c in eval_dataset.column_names if c not in ['input_ids', 'attention_mask', 'label']]
)


In [31]:
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    fp16=False,  # disable mixed precision
    eval_strategy="steps",
    save_steps=500,
    eval_steps=500,
    logging_steps=100,
)

[INFO|training_args.py:2189] 2025-08-31 17:18:31,370 >> PyTorch: setting up devices
[INFO|training_args.py:1799] 2025-08-31 17:18:31,390 >> average_tokens_across_devices is True but world size is 1. Setting it to False automatically.
[INFO|training_args.py:1866] 2025-08-31 17:18:31,391 >> The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [32]:
# Train dataset label check
print("Unique labels in train dataset:", set(train_dataset["label"]))
print("Unique labels in eval dataset:", set(eval_dataset["label"]))

# Make sure all are int
train_dataset = train_dataset.map(lambda e: {"label": int(e["label"])})
eval_dataset = eval_dataset.map(lambda e: {"label": int(e["label"])})

# আবার print করে দেখা
print("After casting:")
print(set(train_dataset["label"]))
print(set(eval_dataset["label"]))

Unique labels in train dataset: {tensor(3), tensor(4), tensor(5), tensor(4), tensor(5), tensor(0), tensor(0), tensor(0), tensor(5), tensor(0), tensor(5), tensor(0), tensor(5), tensor(3), tensor(0), tensor(0), tensor(5), tensor(0), tensor(0), tensor(3), tensor(0), tensor(0), tensor(0), tensor(0), tensor(0), tensor(4), tensor(0), tensor(5), tensor(0), tensor(0), tensor(3), tensor(0), tensor(0), tensor(0), tensor(0), tensor(5), tensor(3), tensor(0), tensor(0), tensor(0), tensor(0), tensor(5), tensor(0), tensor(0), tensor(0), tensor(0), tensor(5), tensor(0), tensor(0), tensor(3), tensor(0), tensor(0), tensor(0), tensor(0), tensor(0), tensor(0), tensor(5), tensor(5), tensor(0), tensor(0), tensor(0), tensor(0), tensor(0), tensor(0), tensor(0), tensor(5), tensor(0), tensor(0), tensor(0), tensor(3), tensor(0), tensor(5), tensor(3), tensor(0), tensor(5), tensor(0), tensor(5), tensor(3), tensor(3), tensor(0), tensor(1), tensor(0), tensor(0), tensor(4), tensor(0), tensor(4), tensor(1), tensor(3),

Map:   0%|          | 0/35637 [00:00<?, ? examples/s]

Map:   0%|          | 0/2512 [00:00<?, ? examples/s]

After casting:
{tensor(5), tensor(3), tensor(0), tensor(3), tensor(3), tensor(0), tensor(4), tensor(0), tensor(3), tensor(0), tensor(3), tensor(0), tensor(0), tensor(0), tensor(0), tensor(3), tensor(5), tensor(0), tensor(0), tensor(5), tensor(0), tensor(0), tensor(0), tensor(1), tensor(2), tensor(5), tensor(5), tensor(0), tensor(0), tensor(0), tensor(5), tensor(0), tensor(0), tensor(5), tensor(4), tensor(0), tensor(0), tensor(0), tensor(0), tensor(0), tensor(0), tensor(5), tensor(0), tensor(5), tensor(5), tensor(0), tensor(0), tensor(1), tensor(0), tensor(5), tensor(4), tensor(3), tensor(5), tensor(5), tensor(0), tensor(0), tensor(3), tensor(0), tensor(1), tensor(0), tensor(0), tensor(0), tensor(0), tensor(0), tensor(0), tensor(0), tensor(4), tensor(0), tensor(5), tensor(3), tensor(0), tensor(0), tensor(0), tensor(0), tensor(0), tensor(0), tensor(5), tensor(0), tensor(4), tensor(5), tensor(0), tensor(0), tensor(4), tensor(3), tensor(4), tensor(5), tensor(0), tensor(0), tensor(1), tenso

In [33]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

In [37]:
# AcceleratorState ঠিক করার জন্য
from accelerate.state import AcceleratorState
from accelerate import Accelerator

# পুরানো state reset করুন
AcceleratorState._reset_state()

# নতুন accelerator তৈরি করুন
accelerator = Accelerator()

# Trainer আবার তৈরি করুন
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
    processing_class=tokenizer,
    data_collator=data_collator,
)
train_result = trainer.train()
metrics = train_result.metrics
max_train_samples = (
    max_train_samples if max_train_samples is not None else len(train_dataset)
)
metrics["train_samples"] = min(max_train_samples, len(train_dataset))

[INFO|trainer.py:2433] 2025-08-31 17:20:11,630 >> ***** Running training *****
[INFO|trainer.py:2434] 2025-08-31 17:20:11,640 >>   Num examples = 35,637
[INFO|trainer.py:2435] 2025-08-31 17:20:11,645 >>   Num Epochs = 3
[INFO|trainer.py:2436] 2025-08-31 17:20:11,648 >>   Instantaneous batch size per device = 8
[INFO|trainer.py:2439] 2025-08-31 17:20:11,650 >>   Total train batch size (w. parallel, distributed & accumulation) = 8
[INFO|trainer.py:2440] 2025-08-31 17:20:11,651 >>   Gradient Accumulation steps = 1
[INFO|trainer.py:2441] 2025-08-31 17:20:11,651 >>   Total optimization steps = 13,365
[INFO|trainer.py:2442] 2025-08-31 17:20:11,657 >>   Number of trainable parameters = 110,621,958


Step,Training Loss,Validation Loss,Accuracy
500,0.9542,0.954623,0.653264
1000,0.8855,0.857314,0.663217
1500,0.8685,0.804691,0.707006
2000,0.8617,0.783455,0.697452
2500,0.8467,0.752358,0.708599
3000,0.8286,0.746486,0.712182
3500,0.754,0.724502,0.72492
4000,0.8085,0.735144,0.712978
4500,0.6955,0.781252,0.708201
5000,0.5864,0.834015,0.710191


[INFO|trainer.py:4408] 2025-08-31 17:21:59,075 >> 
***** Running Evaluation *****
[INFO|trainer.py:4410] 2025-08-31 17:21:59,076 >>   Num examples = 2512
[INFO|trainer.py:4413] 2025-08-31 17:21:59,077 >>   Batch size = 8
[INFO|trainer.py:4074] 2025-08-31 17:22:18,134 >> Saving model checkpoint to ./results/checkpoint-500
[INFO|configuration_utils.py:478] 2025-08-31 17:22:18,138 >> Configuration saved in ./results/checkpoint-500/config.json
[INFO|modeling_utils.py:4185] 2025-08-31 17:22:25,365 >> Model weights saved in ./results/checkpoint-500/model.safetensors
[INFO|tokenization_utils_base.py:2562] 2025-08-31 17:22:25,368 >> tokenizer config file saved in ./results/checkpoint-500/tokenizer_config.json
[INFO|tokenization_utils_base.py:2571] 2025-08-31 17:22:25,369 >> Special tokens file saved in ./results/checkpoint-500/special_tokens_map.json
[INFO|trainer.py:4408] 2025-08-31 17:24:07,727 >> 
***** Running Evaluation *****
[INFO|trainer.py:4410] 2025-08-31 17:24:07,728 >>   Num example

#### Saving the tokenizer too for easy upload

In [38]:
trainer.save_model()
trainer.log_metrics("train", metrics)
trainer.save_metrics("train", metrics)
trainer.save_state()

[INFO|trainer.py:4074] 2025-08-31 18:24:51,582 >> Saving model checkpoint to ./results
[INFO|configuration_utils.py:478] 2025-08-31 18:24:51,585 >> Configuration saved in ./results/config.json
[INFO|modeling_utils.py:4185] 2025-08-31 18:25:04,505 >> Model weights saved in ./results/model.safetensors
[INFO|tokenization_utils_base.py:2562] 2025-08-31 18:25:04,509 >> tokenizer config file saved in ./results/tokenizer_config.json
[INFO|tokenization_utils_base.py:2571] 2025-08-31 18:25:04,509 >> Special tokens file saved in ./results/special_tokens_map.json


***** train metrics *****
  epoch                    =        3.0
  total_flos               =  6549636GF
  train_loss               =     0.6481
  train_runtime            = 1:04:19.45
  train_samples            =      35637
  train_samples_per_second =     27.701
  train_steps_per_second   =      3.463


#### Evaluating our model on validation/development data

In [39]:
logger.info("*** Evaluate ***")

metrics = trainer.evaluate(eval_dataset=eval_dataset)

max_eval_samples = (
    max_eval_samples if max_eval_samples is not None else len(eval_dataset)
)
metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset))

trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)

INFO:__main__:*** Evaluate ***
[INFO|trainer.py:4408] 2025-08-31 18:25:13,786 >> 
***** Running Evaluation *****
[INFO|trainer.py:4410] 2025-08-31 18:25:13,786 >>   Num examples = 2512
[INFO|trainer.py:4413] 2025-08-31 18:25:13,787 >>   Batch size = 8


***** eval metrics *****
  epoch                   =        3.0
  eval_accuracy           =     0.7078
  eval_loss               =     1.0059
  eval_runtime            = 0:00:18.98
  eval_samples            =       2512
  eval_samples_per_second =    132.319
  eval_steps_per_second   =      16.54


### Predecting the test data

In [40]:
id2l = {v: k for k, v in l2id.items()}
logger.info("*** Predict ***")
#predict_dataset = predict_dataset.remove_columns("label")
ids = predict_dataset['id']
predict_dataset = predict_dataset.remove_columns("id")
predictions = trainer.predict(predict_dataset, metric_key_prefix="predict").predictions
predictions = np.argmax(predictions, axis=1)
output_predict_file = os.path.join(training_args.output_dir, f"subtask_1A.tsv")
if trainer.is_world_process_zero():
    with open(output_predict_file, "w") as writer:
        logger.info(f"***** Predict results *****")
        writer.write("id\tlabel\tmodel\n")
        for index, item in enumerate(predictions):
            item = label_list[item]
            item = id2l[item]
            writer.write(f"{ids[index]}\t{item}\t{model_name}\n")

INFO:__main__:*** Predict ***
[INFO|trainer.py:932] 2025-08-31 18:25:48,361 >> The following columns in the test set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: text. If text are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:4408] 2025-08-31 18:25:48,366 >> 
***** Running Prediction *****
[INFO|trainer.py:4410] 2025-08-31 18:25:48,366 >>   Num examples = 2512
[INFO|trainer.py:4413] 2025-08-31 18:25:48,367 >>   Batch size = 8


INFO:__main__:***** Predict results *****


In [44]:
# Convert numeric predictions back to text labels
id2l = {0: 'None', 1: 'Religious Hate', 2: 'Sexism', 3: 'Political Hate', 4: 'Profane', 5: 'Abusive'}
logger.info("*** Predict ***")

# Get test data IDs
ids = predict_dataset['id']
predict_dataset = predict_dataset.remove_columns("id")

# Make predictions
predictions = trainer.predict(predict_dataset, metric_key_prefix="predict").predictions
predictions = np.argmax(predictions, axis=1)

# Convert to text labels
text_predictions = [id2l[pred] for pred in predictions]

# Create proper submission file
output_predict_file = os.path.join(training_args.output_dir, "task.tsv")
model_name_for_submission = "BanglaBERT-HateSpeech"

if trainer.is_world_process_zero():
    with open(output_predict_file, "w", encoding='utf-8') as writer:
        logger.info(f"***** Predict results *****")
        writer.write("id\tlabel\tmodel\n")
        for test_id, prediction in zip(ids, text_predictions):
            writer.write(f"{test_id}\t{prediction}\t{model_name_for_submission}\n")

    logger.info(f"Predictions saved to {output_predict_file}")

INFO:__main__:*** Predict ***
[INFO|trainer.py:932] 2025-08-31 18:27:03,768 >> The following columns in the test set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: text. If text are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:4408] 2025-08-31 18:27:03,777 >> 
***** Running Prediction *****
[INFO|trainer.py:4410] 2025-08-31 18:27:03,779 >>   Num examples = 2512
[INFO|trainer.py:4413] 2025-08-31 18:27:03,781 >>   Batch size = 8


INFO:__main__:***** Predict results *****
INFO:__main__:Predictions saved to ./results/task.tsv


In [45]:
ids[0]

879187

#### Saving the model into card

In [46]:
kwargs = {"finetuned_from": model_name, "tasks": "text-classification"}
trainer.create_model_card(**kwargs)

[INFO|modelcard.py:456] 2025-08-31 18:27:31,593 >> Dropping the following result as it does not have all the necessary fields:
{'task': {'name': 'Text Classification', 'type': 'text-classification'}, 'metrics': [{'name': 'Accuracy', 'type': 'accuracy', 'value': 0.7078025341033936}]}


In [None]:
!zip subtask_1A.zip ./distilBERT_m/subtask_1A.tsv

In [None]:
!pip install simpletransformers