# Jigsaw Unintended Bias in Toxicity Classification

1. Import dependencies

In [1]:
import random
import time

import numpy as np
import torch
import os
import pandas as pd

from transformers import (
    AutoTokenizer,
    DataCollatorWithPadding,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    pipeline,
)

from src.data.kaggle import submit_competition, publish_model
from src.model.metrics import compute_metrics
from src.data.toxicity import (
    load_toxicity_dataset,
    TOXICITY_LABEL_TO_ID,
    TOXICITY_ID_TO_LABEL,
)
from src.util.torch_device import resolve_torch_device
from src.definitions import MODELS_FOLDER, EXTERNAL_DATA_FOLDER, PROCESSED_DATA_FOLDER, SUBMITIONS_FOLDER
from src.metrics.bias import (
    compute_bias_metrics_for_model,
    calculate_overall_auc,
    get_final_metric,
)

2. Prepare Env

In [2]:
random_seed = 42

random.seed(random_seed)
torch.manual_seed(random_seed)
np.random.seed(random_seed)

device = resolve_torch_device()

competition = "jigsaw-unintended-bias-in-toxicity-classification"
submition_path = (
    SUBMITIONS_FOLDER
    / "jigsaw-unintended-bias-in-toxicity-classification"
    / "submission.csv"
)

identity_columns = [
    "male",
    "female",
    "homosexual_gay_or_lesbian",
    "christian",
    "jewish",
    "muslim",
    "black",
    "white",
    "psychiatric_or_mental_illness",
]

toxicity_column = "label"
text_column = "text"

model_checkpoint = "distilbert/distilbert-base-uncased"
model_name = "jigsaw-unintended-bias-in-toxicity-classification"
num_epochs = 3
learning_rate = 2e-5

epoch_time = int(time.time())

os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0"

device

device(type='cuda')

3. Load dataset

In [3]:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

ds = load_toxicity_dataset(
    EXTERNAL_DATA_FOLDER,
    PROCESSED_DATA_FOLDER,
    tokenizer,
    random_seed,
    identity_columns,
)

3. Prepare model

In [None]:
model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint,
    num_labels=len(TOXICITY_LABEL_TO_ID),
    id2label=TOXICITY_ID_TO_LABEL,
    label2id=TOXICITY_LABEL_TO_ID,
).to(device)

In [None]:
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

training_args = TrainingArguments(
    output_dir=MODELS_FOLDER / f"{model_name}-checkpoint",
    learning_rate=learning_rate,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=num_epochs,
    weight_decay=0.01,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    seed=random_seed,
    auto_find_batch_size=True,
    metric_for_best_model="f1",
    greater_is_better=True,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=ds["train"],
    eval_dataset=ds["test"],
    processing_class=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

4. Train model

In [None]:
trainer.train()

5. Evaluate

In [None]:
evaluation_feedback = trainer.evaluate()

In [None]:
evaluation_feedback

6. Save weights

In [None]:
trainer.save_model(MODELS_FOLDER / model_name)
tokenizer.save_pretrained(MODELS_FOLDER / model_name)

7. Predict test data

In [None]:
model = AutoModelForSequenceClassification.from_pretrained(MODELS_FOLDER / model_name)
tokenizer = AutoTokenizer.from_pretrained(MODELS_FOLDER / model_name)

In [None]:
test = pd.read_csv(
    EXTERNAL_DATA_FOLDER
    / "jigsaw-unintended-bias-in-toxicity-classification"
    / "test.csv"
)
submission = pd.read_csv(
    EXTERNAL_DATA_FOLDER
    / "jigsaw-unintended-bias-in-toxicity-classification"
    / "sample_submission.csv"
)

In [5]:
predictor = pipeline(
    "text-classification", model=model, tokenizer=tokenizer
)

Device set to use cuda:0


In [None]:
prediction_df = pd.DataFrame.from_records(
    predictor(test["comment_text"].values.tolist())
)

In [None]:
submission["prediction"] = prediction_df["label"].map(TOXICITY_LABEL_TO_ID)

submition_path.parent.mkdir(parents=True, exist_ok=True)

submission = submission.set_index("id")

submission.to_csv(submition_path)

In [None]:
message = (
    f"[ {model_checkpoint} ] {num_epochs} epochs with {learning_rate} learning rate"
)

submit_competition(submition_path, message, competition)

8. Evaluate model for bias

In [6]:
validate_df = ds["test"].to_pandas()

In [7]:
validate_df.head()

Unnamed: 0,id,label,text,severe_toxicity,obscene,identity_attack,insult,threat,asian,atheist,...,wow,sad,likes,disagree,sexual_explicit,identity_annotator_count,toxicity_annotator_count,__index_level_0__,input_ids,attention_mask
0,391182,0,"Here's more ""tea leaves"":\n1) The opposition ...",0.0,0.0,0.0,0.0,0.0,,,...,0,0,5,0,0.0,0,4,121991,"[101, 2182, 1005, 1055, 2062, 1000, 5572, 3727...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
1,5666077,0,Huh?,0.0,0.0,0.0,0.0,0.0,,,...,0,0,0,0,0.0,0,4,1269345,"[101, 9616, 1029, 102]","[1, 1, 1, 1]"
2,5541104,0,Tempmanoa> Your post is well taken and provid...,0.0,0.0,0.0,0.0,0.0,,,...,0,0,0,0,0.0,0,4,1165966,"[101, 8915, 8737, 2386, 10441, 1028, 2115, 269...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
3,5593855,0,The last two weeks shows the internship is ove...,0.0,0.0,0.0,0.0,0.0,,,...,0,1,10,1,0.0,0,6,1209559,"[101, 1996, 2197, 2048, 3134, 3065, 1996, 2267...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
4,377550,0,And he was impeached for lying.,0.0,0.0,0.0,0.0,0.0,,,...,0,0,2,0,0.0,0,4,110675,"[101, 1998, 2002, 2001, 17727, 5243, 7690, 200...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]"


In [8]:
y_pred = predictor(validate_df[text_column].values.tolist())
y_pred = [it["score"] if it["label"] == "toxic" else 1 - it["score"] for it in y_pred]

validate_df[model_name] = y_pred

In [9]:

bias_metrics_df = compute_bias_metrics_for_model(
    validate_df, identity_columns, model_name, toxicity_column
)

bias_metrics_df

Unnamed: 0,subgroup,subgroup_size,subgroup_auc,bpsn_auc,bnsp_auc
2,homosexual_gay_or_lesbian,1099,0.873451,0.882674,0.975838
6,black,1442,0.877475,0.87991,0.97572
7,white,2451,0.897013,0.884919,0.980149
5,muslim,2166,0.898768,0.909938,0.972307
4,jewish,748,0.919292,0.937124,0.965055
8,psychiatric_or_mental_illness,458,0.930447,0.929959,0.974025
1,female,5451,0.941764,0.946684,0.972083
0,male,4409,0.944213,0.942852,0.974899
3,christian,4030,0.944305,0.961183,0.96108


In [10]:
get_final_metric(
    bias_metrics_df, calculate_overall_auc(validate_df, model_name, toxicity_column)
)

np.float64(0.9431894532255664)

9. Publish model

In [4]:
publish_model(MODELS_FOLDER / model_name, "transformers", "default")

VBox(children=(HTML(value='<center> <img\nsrc=https://www.kaggle.com/static/images/site-logo.png\nalt=\'Kaggle…

Uploading Model https://www.kaggle.com/models/amelashchenko/jigsaw-unintended-bias-in-toxicity-classification/transformers/default ...
Starting upload for file /home/melal/Workspace/nlp/models/jigsaw-unintended-bias-in-toxicity-classification/config.json


Uploading: 100%|██████████| 742/742 [00:00<00:00, 1.79kB/s]

Upload successful: /home/melal/Workspace/nlp/models/jigsaw-unintended-bias-in-toxicity-classification/config.json (742B)
Starting upload for file /home/melal/Workspace/nlp/models/jigsaw-unintended-bias-in-toxicity-classification/model.safetensors



Uploading: 100%|██████████| 268M/268M [00:24<00:00, 11.0MB/s] 

Upload successful: /home/melal/Workspace/nlp/models/jigsaw-unintended-bias-in-toxicity-classification/model.safetensors (255MB)
Starting upload for file /home/melal/Workspace/nlp/models/jigsaw-unintended-bias-in-toxicity-classification/special_tokens_map.json



Uploading: 100%|██████████| 125/125 [00:00<00:00, 286B/s]

Upload successful: /home/melal/Workspace/nlp/models/jigsaw-unintended-bias-in-toxicity-classification/special_tokens_map.json (125B)
Starting upload for file /home/melal/Workspace/nlp/models/jigsaw-unintended-bias-in-toxicity-classification/tokenizer.json



Uploading: 100%|██████████| 711k/711k [00:01<00:00, 673kB/s] 

Upload successful: /home/melal/Workspace/nlp/models/jigsaw-unintended-bias-in-toxicity-classification/tokenizer.json (695KB)
Starting upload for file /home/melal/Workspace/nlp/models/jigsaw-unintended-bias-in-toxicity-classification/training_args.bin



Uploading: 100%|██████████| 5.37k/5.37k [00:00<00:00, 12.9kB/s]

Upload successful: /home/melal/Workspace/nlp/models/jigsaw-unintended-bias-in-toxicity-classification/training_args.bin (5KB)
Starting upload for file /home/melal/Workspace/nlp/models/jigsaw-unintended-bias-in-toxicity-classification/tokenizer_config.json



Uploading: 100%|██████████| 1.23k/1.23k [00:00<00:00, 3.00kB/s]

Upload successful: /home/melal/Workspace/nlp/models/jigsaw-unintended-bias-in-toxicity-classification/tokenizer_config.json (1KB)
Starting upload for file /home/melal/Workspace/nlp/models/jigsaw-unintended-bias-in-toxicity-classification/vocab.txt



Uploading: 100%|██████████| 232k/232k [00:00<00:00, 284kB/s]

Upload successful: /home/melal/Workspace/nlp/models/jigsaw-unintended-bias-in-toxicity-classification/vocab.txt (226KB)





Your model instance has been created.
Files are being processed...
See at: https://www.kaggle.com/models/amelashchenko/jigsaw-unintended-bias-in-toxicity-classification/transformers/default
