In [1]:
import os
import mlflow
import mlflow.sklearn
import pandas as pd
import joblib
from sklearn.feature_extraction.text import TfidfVectorizer, HashingVectorizer, TfidfTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.svm import LinearSVC
from sklearn.naive_bayes import ComplementNB
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import f1_score, classification_report
from datetime import datetime

In [None]:
# Point MLflow to local folder
mlflow.set_tracking_uri("file://" + os.path.abspath("../mlruns"))
mlflow.set_experiment("toxic_comment_baselines")

# =========================
# 2. Load toxic dataset
# =========================
train_df = pd.read_csv("../assets/comments_train.csv")
test_df = pd.read_csv("../assets/comments_test.csv")

X_train = train_df["comment_text"]
y_train = train_df["moderation_label"]

X_test = test_df["comment_text"]
y_test = test_df["moderation_label"]

2025/08/07 18:58:01 INFO mlflow.tracking.fluent: Experiment with name 'toxic_comment_baselines' does not exist. Creating a new experiment.


In [3]:
# =========================
# 3. Model configs
# =========================
configs = [
    {
        "name": "tfidf_word12_logreg_balanced",
        "vectorizer": TfidfVectorizer(ngram_range=(1,2), lowercase=True, max_features=200000),
        "model": LogisticRegression(max_iter=1000, class_weight="balanced", solver="lbfgs", C=1.0)
    },
    {
        "name": "tfidf_char35_logreg_balanced",
        "vectorizer": TfidfVectorizer(analyzer="char", ngram_range=(3,5), lowercase=True, max_features=300000),
        "model": LogisticRegression(max_iter=1000, class_weight="balanced", solver="lbfgs", C=1.0)
    },
    {
        "name": "tfidf_char35_linearSVC_balanced",
        "vectorizer": TfidfVectorizer(analyzer="char", ngram_range=(3,5), lowercase=True, max_features=300000),
        "model": LinearSVC(class_weight="balanced", C=1.0)
    },
    {
        "name": "tfidf_char35_complementNB",
        "vectorizer": TfidfVectorizer(analyzer="char", ngram_range=(3,5), lowercase=True, max_features=300000),
        "model": ComplementNB(alpha=0.5)
    },
    {
        "name": "hashing_sgd_logloss",
        "vectorizer": Pipeline([
            ("hash", HashingVectorizer(ngram_range=(1,2), lowercase=True, n_features=2**18, alternate_sign=False)),
            ("tfidf", TfidfTransformer())
        ]),
        "model": SGDClassifier(loss="log_loss", alpha=1e-4, max_iter=1000)
    },
    {
        "name": "linearSVC_char35_calibrated",
        "vectorizer": TfidfVectorizer(analyzer="char", ngram_range=(3,5), lowercase=True, max_features=300000),
        "model": CalibratedClassifierCV(
            estimator=LinearSVC(class_weight="balanced", C=1.0),
            method="sigmoid",
            cv=5
        )
    }
]

In [4]:
# =========================
# 4. Train, evaluate, log each
# =========================
for cfg in configs:
    print(f"Running: {cfg['name']}")
    with mlflow.start_run(run_name=cfg["name"]):
        # Build pipeline
        pipeline = Pipeline([
            ("vectorizer", cfg["vectorizer"]),
            ("model", cfg["model"])
        ])

        # Fit
        pipeline.fit(X_train, y_train)

        # Predict
        preds = pipeline.predict(X_test)

        # Metrics
        f1_w = f1_score(y_test, preds, average="weighted")
        f1_m = f1_score(y_test, preds, average="macro")
        print(f"F1 (weighted): {f1_w:.4f} | F1 (macro): {f1_m:.4f}")
        print(classification_report(y_test, preds))

        # Log params
        mlflow.log_param("vectorizer", cfg["vectorizer"].__class__.__name__)
        mlflow.log_param("model", cfg["model"].__class__.__name__)
        mlflow.log_metric("f1_weighted", f1_w)
        mlflow.log_metric("f1_macro", f1_m)

        # Save artifacts
        model_path = f"{cfg['name']}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.joblib"
        joblib.dump(pipeline, model_path)
        mlflow.log_artifact(model_path)

        mlflow.sklearn.log_model(pipeline, artifact_path="model")

print("✅ All runs completed. Start MLflow UI with: mlflow ui --port 5000")


Running: tfidf_word12_logreg_balanced
F1 (weighted): 0.9015 | F1 (macro): 0.7688
              precision    recall  f1-score   support

           0       0.99      0.89      0.93     57735
           1       0.46      0.89      0.60      6243

    accuracy                           0.89     63978
   macro avg       0.72      0.89      0.77     63978
weighted avg       0.93      0.89      0.90     63978





Running: tfidf_char35_logreg_balanced
F1 (weighted): 0.8927 | F1 (macro): 0.7571
              precision    recall  f1-score   support

           0       0.99      0.87      0.93     57735
           1       0.43      0.92      0.59      6243

    accuracy                           0.87     63978
   macro avg       0.71      0.90      0.76     63978
weighted avg       0.94      0.87      0.89     63978





Running: tfidf_char35_linearSVC_balanced
F1 (weighted): 0.9117 | F1 (macro): 0.7868
              precision    recall  f1-score   support

           0       0.99      0.90      0.94     57735
           1       0.49      0.88      0.63      6243

    accuracy                           0.90     63978
   macro avg       0.74      0.89      0.79     63978
weighted avg       0.94      0.90      0.91     63978





Running: tfidf_char35_complementNB
F1 (weighted): 0.8797 | F1 (macro): 0.7194
              precision    recall  f1-score   support

           0       0.97      0.87      0.92     57735
           1       0.39      0.77      0.52      6243

    accuracy                           0.86     63978
   macro avg       0.68      0.82      0.72     63978
weighted avg       0.92      0.86      0.88     63978





Running: hashing_sgd_logloss




F1 (weighted): 0.8886 | F1 (macro): 0.6116
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     57735
           1       0.93      0.16      0.27      6243

    accuracy                           0.92     63978
   macro avg       0.92      0.58      0.61     63978
weighted avg       0.92      0.92      0.89     63978





Running: linearSVC_char35_calibrated
F1 (weighted): 0.9312 | F1 (macro): 0.8186
              precision    recall  f1-score   support

           0       0.98      0.94      0.96     57735
           1       0.59      0.80      0.68      6243

    accuracy                           0.93     63978
   macro avg       0.78      0.87      0.82     63978
weighted avg       0.94      0.93      0.93     63978





✅ All runs completed. Start MLflow UI with: mlflow ui --port 5000
