# Jigsaw Unintended Bias in Toxicity Classification

1. Import dependencies

In [None]:
import random
import time

import numpy as np
import torch
import os

from transformers import (
    AutoTokenizer,
    DataCollatorWithPadding,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
)

from src.model.metrics import compute_metrics
from src.data.toxicity import load_toxicity_dataset
from src.util.torch_device import resolve_torch_device
from src.definitions import MODELS_FOLDER, EXTERNAL_DATA_FOLDER, PROCESSED_DATA_FOLDER

2. Prepare Env

In [None]:
random_seed = 42

random.seed(random_seed)
torch.manual_seed(random_seed)
np.random.seed(random_seed)

device = resolve_torch_device()

model_checkpoint = "distilbert/distilbert-base-uncased"

epoch_time = int(time.time())

os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0"

device

3. Load dataset

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

ds = load_toxicity_dataset(
    EXTERNAL_DATA_FOLDER, PROCESSED_DATA_FOLDER, tokenizer, random_seed
)

In [None]:
label2id = {
    "non-toxic": 0,
    "toxic": 1,
}

id2label = {v: k for k, v in label2id.items()}

3. Prepare model

In [None]:
model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint,
    num_labels=len(label2id),
    id2label=id2label,
    label2id=label2id,
).to(device)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

training_args = TrainingArguments(
    output_dir=MODELS_FOLDER / "span-detection-checkpoint",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=2,
    weight_decay=0.01,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    seed=random_seed,
    auto_find_batch_size=True,
    metric_for_best_model="f1",
    greater_is_better=True,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=ds["train"],
    eval_dataset=ds["test"],
    processing_class=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

4. Train model

In [None]:
trainer.train()

5. Evaluate

In [None]:
evaluation_feedback = trainer.evaluate()

6. Save weights

In [None]:
trainer.save_model(MODELS_FOLDER / "jigsaw-unintended-bias-in-toxicity-classification")
tokenizer.save_pretrained(
    MODELS_FOLDER / "jigsaw-unintended-bias-in-toxicity-classification"
)