In [1]:
!pip install transformers datasets torch scikit-learn


Collecting datasets
  Downloading datasets-4.5.0-py3-none-any.whl.metadata (19 kB)
Collecting pyarrow>=21.0.0 (from datasets)
  Downloading pyarrow-23.0.0-cp310-cp310-win_amd64.whl.metadata (3.1 kB)
Collecting dill<0.4.1,>=0.3.0 (from datasets)
  Downloading dill-0.4.0-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.6.0-cp310-cp310-win_amd64.whl.metadata (13 kB)
Collecting multiprocess<0.70.19 (from datasets)
  Downloading multiprocess-0.70.18-py310-none-any.whl.metadata (7.5 kB)
Collecting aiohttp!=4.0.0a0,!=4.0.0a1 (from fsspec[http]<=2025.10.0,>=2023.1.0->datasets)
  Downloading aiohttp-3.13.3-cp310-cp310-win_amd64.whl.metadata (8.4 kB)
Collecting aiohappyeyeballs>=2.5.0 (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.10.0,>=2023.1.0->datasets)
  Downloading aiohappyeyeballs-2.6.1-py3-none-any.whl.metadata (5.9 kB)
Collecting aiosignal>=1.4.0 (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.10.0,>=2023.1.0->datasets)
  Downloading 

  You can safely remove it manually.

[notice] A new release of pip is available: 25.2 -> 26.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
import pandas as pd
import numpy as np
import torch

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

from transformers import (
    DistilBertTokenizerFast,
    DistilBertForSequenceClassification,
    Trainer,
    TrainingArguments
)


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_csv("../data/jigsaw-unintended-bias-train.csv")

df["toxicity"] = df["toxic"] >= 0.5

# Sample 300k for CPU training
df_sample = df.sample(n=300000, random_state=42)

print("Sample shape:", df_sample.shape)


Sample shape: (300000, 46)


In [3]:
X_train, X_test, y_train, y_test = train_test_split(
    df_sample["comment_text"],
    df_sample["toxicity"],
    test_size=0.2,
    stratify=df_sample["toxicity"],
    random_state=42
)


In [4]:
tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")


In [5]:
def tokenize(batch_texts):
    return tokenizer(
        batch_texts.tolist(),
        padding=True,
        truncation=True,
        max_length=128
    )

train_encodings = tokenize(X_train)
test_encodings = tokenize(X_test)

print("Tokenization complete ")


Tokenization complete 


In [6]:
class ToxicDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels.tolist()
        
    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item["labels"] = torch.tensor(self.labels[idx], dtype=torch.float)
        return item

    def __len__(self):
        return len(self.labels)


In [7]:
train_dataset = ToxicDataset(train_encodings, y_train)
test_dataset = ToxicDataset(test_encodings, y_test)

print("Datasets created ")


Datasets created 


In [8]:
model = DistilBertForSequenceClassification.from_pretrained(
    "distilbert-base-uncased",
    num_labels=1  # Binary classification
)


Loading weights: 100%|██████████| 100/100 [00:00<00:00, 361.01it/s, Materializing param=distilbert.transformer.layer.5.sa_layer_norm.weight]   
[1mDistilBertForSequenceClassification LOAD REPORT[0m from: distilbert-base-uncased
Key                     | Status     | 
------------------------+------------+-
vocab_projector.bias    | UNEXPECTED | 
vocab_layer_norm.weight | UNEXPECTED | 
vocab_transform.weight  | UNEXPECTED | 
vocab_transform.bias    | UNEXPECTED | 
vocab_layer_norm.bias   | UNEXPECTED | 
pre_classifier.weight   | MISSING    | 
classifier.bias         | MISSING    | 
classifier.weight       | MISSING    | 
pre_classifier.bias     | MISSING    | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
- MISSING[3m	:those params were newly initialized because missing from the checkpoint. Consider training on your downstream task.[0m


In [12]:
!pip install transformers[torch] --upgrade


Collecting transformers[torch]
  Downloading transformers-5.1.0-py3-none-any.whl.metadata (31 kB)
Collecting huggingface-hub<2.0,>=1.3.0 (from transformers[torch])
  Downloading huggingface_hub-1.4.1-py3-none-any.whl.metadata (13 kB)
Collecting tokenizers<=0.23.0,>=0.22.0 (from transformers[torch])
  Downloading tokenizers-0.22.2-cp39-abi3-win_amd64.whl.metadata (7.4 kB)
Collecting typer-slim (from transformers[torch])
  Downloading typer_slim-0.23.0-py3-none-any.whl.metadata (4.2 kB)
Collecting accelerate>=1.1.0 (from transformers[torch])
  Downloading accelerate-1.12.0-py3-none-any.whl.metadata (19 kB)
Collecting hf-xet<2.0.0,>=1.2.0 (from huggingface-hub<2.0,>=1.3.0->transformers[torch])
  Downloading hf_xet-1.2.0-cp37-abi3-win_amd64.whl.metadata (5.0 kB)
Collecting typer>=0.23.0 (from typer-slim->transformers[torch])
  Downloading typer-0.23.0-py3-none-any.whl.metadata (16 kB)
Collecting annotated-doc>=0.0.2 (from typer>=0.23.0->typer-slim->transformers[torch])
  Downloading annota

  You can safely remove it manually.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
sentence-transformers 5.0.0 requires transformers<5.0.0,>=4.41.0, but you have transformers 5.1.0 which is incompatible.

[notice] A new release of pip is available: 25.2 -> 26.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [12]:
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=1,
    logging_dir="./logs",
    logging_steps=500,
    do_train=True,
    do_eval=True,
    report_to="none"   # <-- string, not None
)


`logging_dir` is deprecated and will be removed in v5.2. Please set `TENSORBOARD_LOGGING_DIR` instead.


In [13]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    probs = torch.sigmoid(torch.tensor(logits)).numpy().flatten()
    return {
        "roc_auc": roc_auc_score(labels, probs)
    }


In [14]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics
)


In [15]:
trainer.train()




Step,Training Loss
500,0.065885
1000,0.057829
1500,0.049354
2000,0.052247
2500,0.049822
3000,0.045845
3500,0.044772
4000,0.052062
4500,0.045873
5000,0.050831


Writing model shards: 100%|██████████| 1/1 [00:00<00:00,  3.07it/s]
Writing model shards: 100%|██████████| 1/1 [00:00<00:00,  4.59it/s]
Writing model shards: 100%|██████████| 1/1 [00:00<00:00,  4.67it/s]
Writing model shards: 100%|██████████| 1/1 [00:00<00:00,  4.31it/s]
Writing model shards: 100%|██████████| 1/1 [00:00<00:00,  4.74it/s]
Writing model shards: 100%|██████████| 1/1 [00:00<00:00,  4.39it/s]
Writing model shards: 100%|██████████| 1/1 [00:00<00:00,  4.42it/s]
Writing model shards: 100%|██████████| 1/1 [00:00<00:00,  3.08it/s]
Writing model shards: 100%|██████████| 1/1 [00:00<00:00,  4.37it/s]
Writing model shards: 100%|██████████| 1/1 [00:00<00:00,  4.26it/s]
Writing model shards: 100%|██████████| 1/1 [00:00<00:00,  4.24it/s]
Writing model shards: 100%|██████████| 1/1 [00:00<00:00,  4.07it/s]
Writing model shards: 100%|██████████| 1/1 [00:00<00:00,  4.50it/s]
Writing model shards: 100%|██████████| 1/1 [00:00<00:00,  4.06it/s]
Writing model shards: 100%|██████████| 1/1 [00:0

TrainOutput(global_step=30000, training_loss=0.04500806573232015, metrics={'train_runtime': 99375.0734, 'train_samples_per_second': 2.415, 'train_steps_per_second': 0.302, 'total_flos': 7947902177280000.0, 'train_loss': 0.04500806573232015, 'epoch': 1.0})

In [1]:
results = trainer.evaluate()
print(results)


NameError: name 'trainer' is not defined

In [4]:
from transformers import DistilBertForSequenceClassification, DistilBertTokenizerFast
import torch

model = DistilBertForSequenceClassification.from_pretrained(
    "./results/distilbert_final"
)

tokenizer = DistilBertTokenizerFast.from_pretrained(
    "./results/distilbert_final"
)

model.eval()

print("Model loaded successfully ")


Loading weights: 100%|██████████| 104/104 [00:00<00:00, 240.19it/s, Materializing param=pre_classifier.weight]                                  


Model loaded successfully 


In [6]:
import pandas as pd
import torch
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, accuracy_score
from transformers import DistilBertTokenizerFast
from torch.utils.data import DataLoader


In [7]:
df = pd.read_csv("../data/jigsaw-unintended-bias-train.csv")

df["toxicity"] = df["toxic"] >= 0.5

# SAME sample as before
df_sample = df.sample(n=300000, random_state=42)

X_train, X_test, y_train, y_test = train_test_split(
    df_sample["comment_text"],
    df_sample["toxicity"],
    test_size=0.2,
    stratify=df_sample["toxicity"],
    random_state=42
)

print("Data recreated ")


Data recreated 


In [8]:
tokenizer = DistilBertTokenizerFast.from_pretrained(
    "./results/distilbert_final"
)

test_encodings = tokenizer(
    X_test.tolist(),
    padding=True,
    truncation=True,
    max_length=128
)


In [10]:
class ToxicDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels.tolist()
        
    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item["labels"] = torch.tensor(self.labels[idx], dtype=torch.float)
        return item

    def __len__(self):
        return len(self.labels)

test_dataset = ToxicDataset(test_encodings, y_test)

print("Test dataset ready ")


Test dataset ready 


In [11]:
from transformers import DistilBertForSequenceClassification

model = DistilBertForSequenceClassification.from_pretrained(
    "./results/distilbert_final"
)

model.eval()

print("Model loaded ")


Loading weights: 100%|██████████| 104/104 [00:00<00:00, 146.07it/s, Materializing param=pre_classifier.weight]                                  


Model loaded 


In [None]:
test_loader = DataLoader(test_dataset, batch_size=16)

all_probs = []
all_labels = []

with torch.no_grad():
    for batch in test_loader:
        inputs = {k: v for k, v in batch.items() if k != "labels"}
        labels = batch["labels"]

        outputs = model(**inputs)
        logits = outputs.logits.squeeze()

        probs = torch.sigmoid(logits)

        all_probs.extend(probs.numpy())
        all_labels.extend(labels.numpy())

roc_auc = roc_auc_score(all_labels, all_probs)
accuracy = accuracy_score(all_labels, np.array(all_probs) > 0.5)

print("DistilBERT ROC-AUC:", round(roc_auc, 4))
print("DistilBERT Accuracy:", round(accuracy, 4))
