In [3]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import roc_auc_score


In [4]:
df = pd.read_csv("../data/jigsaw-unintended-bias-train.csv")

# Create binary label
df["toxicity"] = df["toxic"] >= 0.5

print("Dataset shape:", df.shape)


Dataset shape: (1902194, 46)


In [5]:
X = df["comment_text"]
y = df["toxicity"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    stratify=y,
    random_state=42
)

print("Train size:", len(X_train))
print("Test size:", len(X_test))


Train size: 1521755
Test size: 380439


In [6]:
tfidf = TfidfVectorizer(
    max_features=50000,
    ngram_range=(1,2),
    stop_words='english'
)

X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

print("TF-IDF matrix shape:", X_train_tfidf.shape)


TF-IDF matrix shape: (1521755, 50000)


In [7]:
model = SGDClassifier(
    loss="log_loss",        # logistic regression
    class_weight="balanced",
    max_iter=20,
    n_jobs=-1,
    random_state=42
)

model.fit(X_train_tfidf, y_train)

print("Baseline model trained")


Baseline model trained


In [8]:
y_pred_proba = model.predict_proba(X_test_tfidf)[:, 1]

roc_auc = roc_auc_score(y_test, y_pred_proba)

print("Baseline ROC-AUC:", round(roc_auc, 4))


Baseline ROC-AUC: 0.9147


In [9]:
# Add predictions to test dataframe
test_df = pd.DataFrame({
    "comment_text": X_test,
    "true_label": y_test,
    "pred_proba": y_pred_proba
})

# Add identity columns from original df
identity_columns = [
    'male', 'female', 'muslim', 'jewish',
    'black', 'white', 'christian',
    'homosexual_gay_or_lesbian'
]

for col in identity_columns:
    test_df[col] = df.loc[X_test.index, col].fillna(0)


In [10]:
print("Overall ROC-AUC:", round(roc_auc, 4))
print("\nSubgroup AUC:")

for col in identity_columns:
    subgroup = test_df[test_df[col] > 0.5]
    
    if len(subgroup) > 100:  # avoid tiny groups
        auc = roc_auc_score(subgroup["true_label"], subgroup["pred_proba"])
        print(f"{col}: {round(auc, 4)}")


Overall ROC-AUC: 0.9147

Subgroup AUC:
male: 0.8551
female: 0.8512
muslim: 0.7857
jewish: 0.8264
black: 0.7536
white: 0.7736
christian: 0.8645
homosexual_gay_or_lesbian: 0.7483


In [11]:
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV

# LinearSVC doesn't give probabilities â†’ we calibrate it
svm = LinearSVC(class_weight="balanced")

calibrated_svm = CalibratedClassifierCV(svm)

calibrated_svm.fit(X_train_tfidf, y_train)

print("SVM model trained ")


SVM model trained 


In [12]:
y_pred_proba_svm = calibrated_svm.predict_proba(X_test_tfidf)[:,1]

roc_auc_svm = roc_auc_score(y_test, y_pred_proba_svm)

print("SVM ROC-AUC:", round(roc_auc_svm, 4))


SVM ROC-AUC: 0.9326


In [14]:
test_df = test_df.reset_index(drop=True)

test_df["svm_pred_proba"] = y_pred_proba_svm


In [15]:
print("Overall SVM ROC-AUC:", round(roc_auc_svm, 4))
print("\nSVM Subgroup AUC:")

for col in identity_columns:
    subgroup = test_df[test_df[col] > 0.5]
    
    if len(subgroup) > 100:
        auc = roc_auc_score(subgroup["true_label"], 
                            subgroup["svm_pred_proba"])
        print(f"{col}: {round(auc, 4)}")


Overall SVM ROC-AUC: 0.9326

SVM Subgroup AUC:
male: 0.8843
female: 0.8826
muslim: 0.8083
jewish: 0.8523
black: 0.7926
white: 0.7964
christian: 0.8802
homosexual_gay_or_lesbian: 0.7631
