Setup

In [None]:
import pandas as pd

train_path = "/content/train.csv"
test_path = "/content/test.csv"
test_labels_path = "/content/test_labels.csv"

df_train = pd.read_csv(train_path)
df_test = pd.read_csv(test_path)
df_test_labels = pd.read_csv(test_labels_path)

print("Train shape:", df_train.shape)
print("Test shape:", df_test.shape)
print("Test Labels shape:", df_test_labels.shape)

df_train.head()


Train shape: (159571, 8)
Test shape: (153164, 2)
Test Labels shape: (153164, 7)


Unnamed: 0,id,comment_text,toxic,severe_toxic,obscene,threat,insult,identity_hate
0,0000997932d777bf,Explanation\nWhy the edits made under my usern...,0,0,0,0,0,0
1,000103f0d9cfb60f,D'aww! He matches this background colour I'm s...,0,0,0,0,0,0
2,000113f07ec002fd,"Hey man, I'm really not trying to edit war. It...",0,0,0,0,0,0
3,0001b41b1c6bb37e,"""\nMore\nI can't make any real suggestions on ...",0,0,0,0,0,0
4,0001d958c54c6e35,"You, sir, are my hero. Any chance you remember...",0,0,0,0,0,0


Display data

In [None]:
toxic_labels = ["toxic","severe_toxic","obscene","threat","insult","identity_hate"]

# Make a single "is_toxic" column
df_train["is_toxic"] = df_train[toxic_labels].max(axis=1)

# Keep only the text and the new label
df_train = df_train[["comment_text", "is_toxic"]]
df_train.head()

Unnamed: 0,comment_text,is_toxic
0,Explanation\nWhy the edits made under my usern...,0
1,D'aww! He matches this background colour I'm s...,0
2,"Hey man, I'm really not trying to edit war. It...",0
3,"""\nMore\nI can't make any real suggestions on ...",0
4,"You, sir, are my hero. Any chance you remember...",0


API Key: b87ed088e6c970802f017d0fcf4b03f62f25800f

> Add blockquote



In [None]:
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
import numpy as np
import re
import string

# PyTorch / Transformers / Datasets
import torch
import torch.nn.functional as F
!pip install datasets transformers optuna
from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer
)

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report

##############################################################################
# 1. LOAD DATA
##############################################################################
train_path = "/content/train.csv"
df_train = pd.read_csv(train_path)

# Combine toxic labels into one binary column
toxic_labels = ["toxic","severe_toxic","obscene","threat","insult","identity_hate"]
df_train["is_toxic"] = df_train[toxic_labels].max(axis=1)

df_train = df_train[["comment_text", "is_toxic"]]

##############################################################################
# 2. CLEAN TEXT
##############################################################################
def clean_text(text):
    text = text.lower()
    text = re.sub(r"http\S+|www\S+|https\S+", '', text, flags=re.MULTILINE)
    text = text.translate(str.maketrans('', '', string.punctuation))
    return text

df_train["clean_text"] = df_train["comment_text"].apply(clean_text)

##############################################################################
# 3. TRAIN / VALIDATION SPLIT
##############################################################################
X = df_train["clean_text"]
y = df_train["is_toxic"]

X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

##############################################################################
# 4. CREATE HUGGING FACE DATASETS
##############################################################################
train_dataset = Dataset.from_dict({"text": X_train.tolist(), "label": y_train.tolist()})
val_dataset   = Dataset.from_dict({"text": X_val.tolist(),   "label": y_val.tolist()})

##############################################################################
# 5. TOKENIZER SETUP
##############################################################################
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize_function(example):
    return tokenizer(
        example["text"],
        padding="max_length",
        truncation=True,
        max_length=128
    )

train_dataset_encoded = train_dataset.map(tokenize_function, batched=True)
val_dataset_encoded   = val_dataset.map(tokenize_function, batched=True)

train_dataset_encoded.set_format(
    type="torch",
    columns=["input_ids", "attention_mask", "label"]
)
val_dataset_encoded.set_format(
    type="torch",
    columns=["input_ids", "attention_mask", "label"]
)

##############################################################################
# 6. METRICS FUNCTION
##############################################################################
from sklearn.metrics import roc_auc_score

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    # Softmax calculation
    max_logits = np.max(logits, axis=1, keepdims=True)
    exps = np.exp(logits - max_logits)
    probs = exps / np.sum(exps, axis=1, keepdims=True)

    preds = np.argmax(logits, axis=-1)
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, preds, average="binary", zero_division=0
    )
    acc = accuracy_score(labels, preds)

    # Compute AUC
    auc = roc_auc_score(labels, probs[:, 1])

    return {
        "accuracy": acc,
        "precision": precision,
        "recall": recall,
        "f1": f1,
        "auc": auc
    }


##############################################################################
# 7. TRAINING ARGUMENTS
##############################################################################
training_args = TrainingArguments(
    # Saving of checkpoints and final model
    output_dir="distilbert_checkpoint",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    greater_is_better=True,

    # Best params search
    learning_rate=4.14e-5,
    num_train_epochs=2,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    logging_steps=100,
    seed=42
)

##############################################################################
# 8. INITIALIZE MODEL & TRAINER
##############################################################################
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset_encoded,
    eval_dataset=val_dataset_encoded,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

##############################################################################
# 9. TRAIN
##############################################################################
trainer.train()

##############################################################################
# 10. PREDICTIONS & PROBABILITIES ON VALIDATION SET
##############################################################################
predictions = trainer.predict(val_dataset_encoded)
logits = predictions.predictions
labels = predictions.label_ids

logits_tensor = torch.from_numpy(logits)
probs_tensor = F.softmax(logits_tensor, dim=1)
probs = probs_tensor[:, 1].numpy()  # Probability of the "toxic" class

##############################################################################
# 11. THRESHOLD TUNING
##############################################################################
best_threshold = 0.5
best_f1 = 0.0

thresholds = np.linspace(0, 1, 101)
for t in thresholds:
    temp_preds = (probs >= t).astype(int)
    _, _, f1_temp, _ = precision_recall_fscore_support(labels, temp_preds, average="binary")
    if f1_temp > best_f1:
        best_f1 = f1_temp
        best_threshold = t

print(f"\nBest threshold found: {best_threshold:.2f} (F1 = {best_f1:.4f})")

##############################################################################
# 12. FINAL EVALUATION
##############################################################################
final_preds = (probs >= best_threshold).astype(int)
print("\nFinal Classification Report:")
print(classification_report(labels, final_preds, digits=4))

##############################################################################
# 13. SAVE MODEL TO GOOGLE DRIVE
##############################################################################
save_path = "/content/drive/MyDrive/bert_models/distilbert_final_model"
trainer.save_model(save_path)
tokenizer.save_pretrained(save_path)

print(f"\nModel and tokenizer saved to: {save_path}")

##############################################################################
# 14. FUNCTION TO CLASSIFY NEW SENTENCES
##############################################################################
def classify_sentence(sentence, threshold=best_threshold):
    # Clean text
    cleaned = clean_text(sentence)
    # Tokenize on CPU
    inputs = tokenizer(cleaned, return_tensors="pt", truncation=True, max_length=128)

    # Move inputs to the same device as the model
    device = trainer.model.device
    for k in inputs:
        inputs[k] = inputs[k].to(device)

    # Inference
    with torch.no_grad():
        out_logits = trainer.model(**inputs).logits
        out_probs = F.softmax(out_logits, dim=1)
        prob_toxic = out_probs[0, 1].item()
    return "Toxic" if prob_toxic >= threshold else "Not Toxic"

# EXAMPLE USAGE
sample_texts = [
    "You are a horrible person!",
    "Thank you for your help, I appreciate it."
]
for txt in sample_texts:
    print(f"\nSentence: {txt}")
    print(f"Prediction: {classify_sentence(txt)}")


Mounted at /content/drive
Collecting datasets
  Downloading datasets-3.4.1-py3-none-any.whl.metadata (19 kB)
Collecting optuna
  Downloading optuna-4.2.1-py3-none-any.whl.metadata (17 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.1-py3-none-any.whl.metadata (7.2 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading datasets-3.4.1-py3-none-any.whl (487 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/127656 [00:00<?, ? examples/s]

Map:   0%|          | 0/31915 [00:00<?, ? examples/s]



model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mpeter-rentopoulos[0m ([33mpeter-rentopoulos-concordia-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Auc
1,0.0798,0.108412,0.967695,0.877301,0.79322,0.833145,0.983301
2,0.0842,0.112789,0.967445,0.838967,0.841294,0.840129,0.984047


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Best threshold found: 0.63 (F1 = 0.8420)

Final Classification Report:
              precision    recall  f1-score   support

           0     0.9816    0.9828    0.9822     28670
           1     0.8467    0.8373    0.8420      3245

    accuracy                         0.9680     31915
   macro avg     0.9141    0.9101    0.9121     31915
weighted avg     0.9679    0.9680    0.9680     31915


Model and tokenizer saved to: /content/drive/MyDrive/bert_models/distilbert_final_model

Sentence: You are a horrible person!
Prediction: Toxic

Sentence: Thank you for your help, I appreciate it.
Prediction: Not Toxic


In [None]:
# Manual testing
test_sentences = [
    "Hello, I'm excited about the new project. Great job on the initial design!",
    "You are the worst developer I've come across. Stop messing up everything!",
    "Could you please help me with the next steps? I'm a bit confused right now.",
    "I can't believe you posted that nonsense. What an idiot!",
    "Let's schedule a meeting to discuss the results of our last experiment. They were inconclusive.",
    "I've never felt so humiliated. You should be ashamed of yourself.",
    "Congratulations on your promotion! Keep up the fantastic work.",
    "What in the world were you thinking? That approach was absolutely horrible.",
    "Thank you for your kind support. I really appreciate your understanding.",
    "I hate it when people ignore basic rules. Are you even paying attention?",
    "You are truly remarkable. The effort you’ve put in is admirable!",
    "Stop bothering me, you clueless fool!",
    "Great presentation earlier. I think our team can learn a lot from your style.",
]

for txt in test_sentences:
    prediction = classify_sentence(txt)
    print(f"Text: {txt}")
    print(f"Classification: {prediction}")
    print("-" * 40)



Text: Hello, I'm excited about the new project. Great job on the initial design!
Classification: Not Toxic
----------------------------------------
Text: You are the worst developer I've come across. Stop messing up everything!
Classification: Toxic
----------------------------------------
Text: Could you please help me with the next steps? I'm a bit confused right now.
Classification: Not Toxic
----------------------------------------
Text: I can't believe you posted that nonsense. What an idiot!
Classification: Toxic
----------------------------------------
Text: Let's schedule a meeting to discuss the results of our last experiment. They were inconclusive.
Classification: Not Toxic
----------------------------------------
Text: I've never felt so humiliated. You should be ashamed of yourself.
Classification: Not Toxic
----------------------------------------
Text: Congratulations on your promotion! Keep up the fantastic work.
Classification: Not Toxic
-------------------------------