In [1]:
import os
import mlflow
import mlflow.sklearn
import pandas as pd
import joblib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.metrics import f1_score, classification_report
from datetime import datetime
from flaml.automl.automl import AutoML

In [None]:
# =========================
# 1. MLflow setup
# =========================
mlflow.set_tracking_uri("file://" + os.path.abspath("../mlruns"))
mlflow.set_experiment("toxic_comment_flaml")

# =========================
# 2. Load toxic dataset
# =========================
train_df = pd.read_csv("../assets/comments_train.csv")
test_df = pd.read_csv("../assets/comments_test.csv")

X_train = train_df["comment_text"]
y_train = train_df["moderation_label"]

X_test = test_df["comment_text"]
y_test = test_df["moderation_label"]

In [3]:
# =========================
# 3. Fixed preprocessing (TF-IDF char 3-5, like best manual model)
# =========================
vectorizer = TfidfVectorizer(analyzer="char", ngram_range=(3,5),
                             lowercase=True, max_features=100000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# =========================
# 4. FLAML AutoML search
# =========================
automl = AutoML()

In [4]:
settings = {
    "time_budget": 60,  # seconds
    "metric": "f1",      # macro-F1
    "task": "classification",
    "log_file_name": "flaml_toxic.log",
    "estimator_list": [
        "lrl2", "lgbm", "rf"
    ],
    "eval_method": "cv",
    "n_splits": 3,
    "seed": 42
}

print("🔍 Starting FLAML AutoML search...")
automl.fit(X_train_tfidf, y_train, **settings)

print(f"✅ Best model: {automl.best_estimator}")
print(f"✅ Best config: {automl.best_config}")
print(f"✅ Best CV score: {automl.best_loss}")

# =========================
# 5. Evaluate on test set & log to MLflow
# =========================
with mlflow.start_run(run_name="flaml_tfidf_char35"):
    preds = automl.predict(X_test_tfidf)

    f1_w = f1_score(y_test, preds, average="weighted")
    f1_m = f1_score(y_test, preds, average="macro")

    print(f"F1 (weighted): {f1_w:.4f} | F1 (macro): {f1_m:.4f}")
    print(classification_report(y_test, preds))

    # Log params & metrics
    mlflow.log_param("vectorizer", "TfidfVectorizer(char 3-5, max_features=300k)")
    mlflow.log_param("best_estimator", automl.best_estimator)
    mlflow.log_params(automl.best_config)
    mlflow.log_metric("f1_weighted", f1_w)
    mlflow.log_metric("f1_macro", f1_m)

    # Save artifacts
    model_path = f"flaml_char35_{datetime.now().strftime('%Y%m%d_%H%M%S')}.joblib"
    joblib.dump({"vectorizer": vectorizer, "model": automl}, model_path)
    mlflow.log_artifact(model_path)

    mlflow.sklearn.log_model(automl, artifact_path="model")

print("🚀 FLAML run complete. View results in MLflow UI with: mlflow ui --port 5001")

🔍 Starting FLAML AutoML search...
[flaml.automl.logger: 08-07 19:37:59] {1752} INFO - task = classification
[flaml.automl.logger: 08-07 19:37:59] {1763} INFO - Evaluation method: cv
[flaml.automl.logger: 08-07 19:37:59] {1862} INFO - Minimizing error metric: 1-f1
[flaml.automl.logger: 08-07 19:37:59] {1979} INFO - List of ML learners in AutoML Run: ['lrl2', 'lgbm', 'rf']
[flaml.automl.logger: 08-07 19:37:59] {2282} INFO - iteration 0, current learner lrl2
[flaml.automl.logger: 08-07 19:38:27] {2417} INFO - Estimated sufficient time budget=278659s. Estimated necessary time budget=279s.
[flaml.automl.logger: 08-07 19:38:27] {2466} INFO -  at 28.1s,	estimator lrl2's best error=0.2634,	best estimator lrl2's best error=0.2634
[flaml.automl.logger: 08-07 19:38:27] {2282} INFO - iteration 1, current learner lgbm
[flaml.automl.logger: 08-07 19:43:21] {2466} INFO -  at 322.2s,	estimator lgbm's best error=1.0000,	best estimator lrl2's best error=0.2634
[flaml.automl.logger: 08-07 19:43:38] {2724