# **importing** **libraries**

In [1]:
# Core
import pandas as pd
import numpy as np

# Visualization (optional)
import matplotlib.pyplot as plt
import seaborn as sns

# NLP
import re
import nltk
from nltk.corpus import stopwords
nltk.download("stopwords")

# ML / DL
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, f1_score

# TensorFlow / Keras
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.optimizers import Adam


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


# **Load** **data**

In [2]:
train_df = pd.read_csv("/content/train.csv")
test_df  = pd.read_csv("/content/test.csv")

label_cols = [
    "toxic",
    "severe_toxic",
    "obscene",
    "threat",
    "insult",
    "identity_hate"
]

train_df.head()

Unnamed: 0,id,comment_text,toxic,severe_toxic,obscene,threat,insult,identity_hate
0,0000997932d777bf,Explanation\nWhy the edits made under my usern...,0,0,0,0,0,0
1,000103f0d9cfb60f,D'aww! He matches this background colour I'm s...,0,0,0,0,0,0
2,000113f07ec002fd,"Hey man, I'm really not trying to edit war. It...",0,0,0,0,0,0
3,0001b41b1c6bb37e,"""\nMore\nI can't make any real suggestions on ...",0,0,0,0,0,0
4,0001d958c54c6e35,"You, sir, are my hero. Any chance you remember...",0,0,0,0,0,0


# clean the text

In [3]:
stop_words = set(stopwords.words("english"))

def clean_text(text):
    text = text.lower()
    text = re.sub(r"[^a-z\s]", "", text)
    words = text.split()
    return " ".join([w for w in words if w not in stop_words])

train_df["cleaned_comment"] = train_df["comment_text"].apply(clean_text)
test_df["cleaned_comment"]  = test_df["comment_text"].apply(clean_text)

train_df[["comment_text", "cleaned_comment"]].head()

Unnamed: 0,comment_text,cleaned_comment
0,Explanation\nWhy the edits made under my usern...,explanation edits made username hardcore metal...
1,D'aww! He matches this background colour I'm s...,daww matches background colour im seemingly st...
2,"Hey man, I'm really not trying to edit war. It...",hey man im really trying edit war guy constant...
3,"""\nMore\nI can't make any real suggestions on ...",cant make real suggestions improvement wondere...
4,"You, sir, are my hero. Any chance you remember...",sir hero chance remember page thats


# tokenizing and padding

In [4]:
tokenizer = Tokenizer(num_words=20000)
tokenizer.fit_on_texts(train_df["cleaned_comment"])

X = pad_sequences(
    tokenizer.texts_to_sequences(train_df["cleaned_comment"]),
    maxlen=200,
    padding="post"
)

y = train_df[label_cols].values

# data spliting into train and validation

In [5]:
X_tr, X_val, y_tr, y_val = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42
)


# experiment with cnn,lstm and bert models

# 1st model with CNN

In [6]:
cnn_model = Sequential([
    Embedding(20000, 128),
    Conv1D(128, 5, activation="relu"),
    GlobalMaxPooling1D(),
    Dense(6, activation="sigmoid")
])

cnn_model.compile(
    optimizer=Adam(0.001),
    loss="binary_crossentropy"
)

cnn_model.fit(
    X_tr, y_tr,
    validation_data=(X_val, y_val),
    epochs=5,
    batch_size=64
)

Epoch 1/5
[1m1995/1995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 8ms/step - loss: 0.0957 - val_loss: 0.0507
Epoch 2/5
[1m1995/1995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - loss: 0.0409 - val_loss: 0.0512
Epoch 3/5
[1m1995/1995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - loss: 0.0283 - val_loss: 0.0566
Epoch 4/5
[1m1995/1995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - loss: 0.0188 - val_loss: 0.0668
Epoch 5/5
[1m1995/1995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - loss: 0.0131 - val_loss: 0.0774


<keras.src.callbacks.history.History at 0x79ac0d9b4470>

# 2nd model with LSTM

In [7]:
lstm_model = Sequential([
    Embedding(20000, 128),
    LSTM(64),
    Dropout(0.5),
    Dense(6, activation="sigmoid")
])

lstm_model.compile(
    optimizer=Adam(0.001),
    loss="binary_crossentropy"
)

lstm_model.fit(
    X_tr, y_tr,
    validation_data=(X_val, y_val),
    epochs=5,
    batch_size=64
)

Epoch 1/5
[1m1995/1995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 13ms/step - loss: 0.1646 - val_loss: 0.1407
Epoch 2/5
[1m1995/1995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 13ms/step - loss: 0.1424 - val_loss: 0.1403
Epoch 3/5
[1m1995/1995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 13ms/step - loss: 0.1407 - val_loss: 0.1403
Epoch 4/5
[1m1995/1995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 13ms/step - loss: 0.1412 - val_loss: 0.1405
Epoch 5/5
[1m1995/1995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 13ms/step - loss: 0.1121 - val_loss: 0.0582


<keras.src.callbacks.history.History at 0x79ab946b9d60>

# CNN and LSTM model evaluation

In [8]:
def evaluate_model(model, X_val, y_val, name):
    print(f"\n===== {name} Evaluation =====")
    y_pred = (model.predict(X_val) > 0.5).astype(int)

    print(classification_report(
        y_val,
        y_pred,
        target_names=label_cols,
        zero_division=0
    ))

    macro_f1 = f1_score(y_val, y_pred, average="macro")
    print("Macro F1-score:", macro_f1)

In [9]:
evaluate_model(cnn_model, X_val, y_val, "CNN")
evaluate_model(lstm_model, X_val, y_val, "LSTM")



===== CNN Evaluation =====
[1m998/998[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step
               precision    recall  f1-score   support

        toxic       0.77      0.72      0.74      3056
 severe_toxic       0.51      0.17      0.26       321
      obscene       0.87      0.71      0.78      1715
       threat       0.47      0.23      0.31        74
       insult       0.73      0.60      0.66      1614
identity_hate       0.61      0.32      0.42       294

    micro avg       0.77      0.64      0.70      7074
    macro avg       0.66      0.46      0.53      7074
 weighted avg       0.76      0.64      0.69      7074
  samples avg       0.06      0.06      0.06      7074

Macro F1-score: 0.5291682808496626

===== LSTM Evaluation =====
[1m998/998[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step
               precision    recall  f1-score   support

        toxic       0.82      0.74      0.78      3056
 severe_toxic       0.60      0.04     

# Install Transformers Dataset

In [2]:
!pip install transformers datasets




In [14]:
!pip uninstall -y transformers accelerate peft
!pip install transformers==4.38.2 accelerate==0.27.2 peft==0.9.0


Found existing installation: transformers 4.38.0
Uninstalling transformers-4.38.0:
  Successfully uninstalled transformers-4.38.0
Found existing installation: accelerate 0.27.2
Uninstalling accelerate-0.27.2:
  Successfully uninstalled accelerate-0.27.2
Found existing installation: peft 0.18.1
Uninstalling peft-0.18.1:
  Successfully uninstalled peft-0.18.1
Collecting transformers==4.38.2
  Downloading transformers-4.38.2-py3-none-any.whl.metadata (130 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m130.7/130.7 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate==0.27.2
  Using cached accelerate-0.27.2-py3-none-any.whl.metadata (18 kB)
Collecting peft==0.9.0
  Downloading peft-0.9.0-py3-none-any.whl.metadata (13 kB)
Downloading transformers-4.38.2-py3-none-any.whl (8.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.5/8.5 MB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
[?25hUsing cached accelerate-0.27.2-py3-none-any.whl (279

# DistilBert model

In [3]:
import pandas as pd
import numpy as np
import torch

from datasets import Dataset
from sklearn.metrics import classification_report, f1_score

from transformers import (
    DistilBertTokenizerFast,
    DistilBertForSequenceClassification,
    TrainingArguments,
    Trainer
)

# Prepare Dataset

In [4]:
train_df = pd.read_csv("/content/train.csv")

label_cols = [
    "toxic",
    "severe_toxic",
    "obscene",
    "threat",
    "insult",
    "identity_hate"
]

train_df = train_df[["comment_text"] + label_cols]


In [5]:
dataset = Dataset.from_pandas(train_df)
dataset = dataset.train_test_split(test_size=0.2, seed=42)


# Tokenization

In [6]:
tokenizer = DistilBertTokenizerFast.from_pretrained(
    "distilbert-base-uncased"
)

def tokenize(batch):
    return tokenizer(
        batch["comment_text"],
        truncation=True,
        padding="max_length",
        max_length=64
    )

dataset = dataset.map(tokenize, batched=True)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

Map:   0%|          | 0/127656 [00:00<?, ? examples/s]

Map:   0%|          | 0/31915 [00:00<?, ? examples/s]

# Add Labels

In [7]:
def add_labels(batch):
    batch["labels"] = [
        [float(batch[col][i]) for col in label_cols]
        for i in range(len(batch[label_cols[0]]))
    ]
    return batch

dataset = dataset.map(add_labels, batched=True)


Map:   0%|          | 0/127656 [00:00<?, ? examples/s]

Map:   0%|          | 0/31915 [00:00<?, ? examples/s]

In [8]:
dataset.set_format(
    type="torch",
    columns=["input_ids", "attention_mask", "labels"]
)


# Model & Training

In [9]:
model = DistilBertForSequenceClassification.from_pretrained(
    "distilbert-base-uncased",
    num_labels=len(label_cols),
    problem_type="multi_label_classification"
)


model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [10]:
training_args = TrainingArguments(
    output_dir="./toxicity_results",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    learning_rate=2e-5,
    weight_decay=0.01,
    fp16=True,
    logging_steps=500,
    save_strategy="no",
    report_to="none"
)

In [11]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    tokenizer=tokenizer
)


In [12]:
trainer.train()


Step,Training Loss
500,0.1128
1000,0.052
1500,0.0509
2000,0.0503
2500,0.0499
3000,0.0472
3500,0.0482
4000,0.0472
4500,0.0436
5000,0.0452


TrainOutput(global_step=23937, training_loss=0.037713062754747895, metrics={'train_runtime': 1433.972, 'train_samples_per_second': 267.068, 'train_steps_per_second': 16.693, 'total_flos': 6341799196735488.0, 'train_loss': 0.037713062754747895, 'epoch': 3.0})

# Evaluation of DistilBert


In [13]:
from sklearn.metrics import classification_report, f1_score
import torch

preds = trainer.predict(dataset["test"])

y_true = preds.label_ids
y_prob = torch.sigmoid(torch.tensor(preds.predictions)).numpy()
y_pred = (y_prob > 0.5).astype(int)

print(classification_report(
    y_true,
    y_pred,
    target_names=label_cols,
    zero_division=0
))

print("Macro F1-score:",
      f1_score(y_true, y_pred, average="macro"))


               precision    recall  f1-score   support

        toxic       0.81      0.82      0.82      3018
 severe_toxic       0.51      0.39      0.44       299
      obscene       0.83      0.83      0.83      1676
       threat       0.54      0.46      0.50        87
       insult       0.76      0.76      0.76      1570
identity_hate       0.63      0.56      0.59       268

    micro avg       0.78      0.77      0.78      6918
    macro avg       0.68      0.64      0.66      6918
 weighted avg       0.78      0.77      0.78      6918
  samples avg       0.07      0.07      0.07      6918

Macro F1-score: 0.6553671056866153


# Save the Final Model

In [14]:
model.save_pretrained("toxicity_distilbert")
tokenizer.save_pretrained("toxicity_distilbert")

('toxicity_distilbert/tokenizer_config.json',
 'toxicity_distilbert/special_tokens_map.json',
 'toxicity_distilbert/vocab.txt',
 'toxicity_distilbert/added_tokens.json',
 'toxicity_distilbert/tokenizer.json')

# testing prediction

In [15]:
import torch
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification

label_cols = [
    "toxic", "severe_toxic", "obscene",
    "threat", "insult", "identity_hate"
]

tokenizer = DistilBertTokenizerFast.from_pretrained("toxicity_distilbert")
model = DistilBertForSequenceClassification.from_pretrained("toxicity_distilbert")
model.eval()


DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
 

In [16]:
def predict_toxicity(text, threshold=0.5):
    inputs = tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        padding="max_length",
        max_length=64
    )

    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.sigmoid(outputs.logits).squeeze().numpy()

    results = {
        label: float(prob)
        for label, prob in zip(label_cols, probs)
    }

    predictions = {
        label: int(prob >= threshold)
        for label, prob in results.items()
    }

    return results, predictions


# Give Input and get Output

In [17]:
text = "You are a disgusting idiot and should be banned"
scores, preds = predict_toxicity(text)

scores, preds


({'toxic': 0.9979200959205627,
  'severe_toxic': 0.02491382323205471,
  'obscene': 0.5403298735618591,
  'threat': 0.004475066903978586,
  'insult': 0.9475356340408325,
  'identity_hate': 0.004426893778145313},
 {'toxic': 1,
  'severe_toxic': 0,
  'obscene': 1,
  'threat': 0,
  'insult': 1,
  'identity_hate': 0})

# Download the Model

In [18]:
!zip -r toxicity_distilbert.zip toxicity_distilbert


  adding: toxicity_distilbert/ (stored 0%)
  adding: toxicity_distilbert/tokenizer_config.json (deflated 76%)
  adding: toxicity_distilbert/config.json (deflated 52%)
  adding: toxicity_distilbert/vocab.txt (deflated 53%)
  adding: toxicity_distilbert/model.safetensors (deflated 8%)
  adding: toxicity_distilbert/tokenizer.json (deflated 71%)
  adding: toxicity_distilbert/special_tokens_map.json (deflated 42%)


In [19]:
from google.colab import files
files.download("toxicity_distilbert.zip")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>