# mostly.classify vs AutoGluon

In [1]:
import os
from pathlib import Path

import pandas as pd
from autogluon.tabular import TabularDataset, TabularPredictor
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split

from mostlyai import engine
from mostlyai.engine.domain import ModelType

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import plotly.express as px
import plotly.io as pio

pio.renderers.default = "notebook"

In [3]:
ROOT = Path("..").resolve()
DATA = "census.csv.gz"
DATA_PATH = ROOT / "data" / DATA
TARGET = "income"
POS_LABEL = 1
ws = ROOT / f"ws-{DATA}-classify"

df = pd.read_csv(DATA_PATH)
df = df.replace({"<=50K": 0, ">50K": 1})
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)


Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`



## AutoGluon

In [4]:
# ====== CONFIG ======
SAVE_DIR = "ag_models_fast"
MODELS_TO_RUN = ["GBM", "XGB"]  # fast + strong
TIME_LIMIT = 300  # seconds total budget
EVAL_METRIC = "roc_auc"

In [5]:
def run_autogluon(train_df, test_df):
    train_data = TabularDataset(train_df)
    test_data = TabularDataset(test_df)

    # ====== TRAIN (only selected models, no bagging/stacking) ======
    hyperparameters = {m: {} for m in MODELS_TO_RUN}
    predictor = TabularPredictor(
        label=TARGET,
        path=SAVE_DIR,
        eval_metric=EVAL_METRIC,
    ).fit(
        train_data=train_data,
        time_limit=TIME_LIMIT,
        hyperparameters=hyperparameters,
    )

    # ====== PICK WINNER (by validation AUC) ======
    lb = predictor.leaderboard(silent=True)
    # AutoGluon 'score_val' aligns with eval_metric: higher is better for roc_auc
    best_model = lb.sort_values("score_val", ascending=False)["model"].iloc[0]
    #     print(f"Best model: {best_model}")

    # ====== PREDICT PROBABILITIES ON TEST WITH THE WINNER ======
    proba = predictor.predict_proba(test_data, model=best_model, as_pandas=True)

    return proba

In [6]:
proba_ag = run_autogluon(train_df, test_df)

Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.4.0
Python Version:     3.10.16
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #32~24.04.1-Ubuntu SMP PREEMPT_DYNAMIC Tue Sep  2 14:21:04 UTC 2
CPU Count:          12
Memory Avail:       7.74 GB / 30.95 GB (25.0%)
Disk Space Avail:   7.07 GB / 465.36 GB (1.5%)
	We recommend a minimum available disk space of 10 GB, and large datasets may require more.
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets. Defaulting to `'medium'`...
	Recommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):
	presets='extreme' : New in v1.4: Massively better than 'best' on datasets <30000 samples by using new models meta-learned on https://tabarena.ai: TabPFNv2, TabICL, Mitra, and TabM. Absolute best accuracy. Requires a GPU. Recommended 64 GB CPU memory and 32+ GB GPU memory.
	presets='best'    : Max

## MOSTLY engine

In [7]:
features = [c for c in train_df.columns if c != TARGET]

In [None]:
# Train the generator
engine.split(workspace_dir=ws, tgt_data=train_df, model_type=ModelType.tabular)
engine.analyze(workspace_dir=ws)
engine.encode(workspace_dir=ws)
engine.train(workspace_dir=ws, enable_flexible_generation=True)
# # Generate synthetic data (for comparison)
engine.generate(workspace_dir=ws, sample_size=len(train_df))

In [None]:
# Classify TARGET using all other columns as features
proba_df = engine.classify(data=test_df, features=features, target=TARGET, workspace_dir=ws)

In [None]:
# it was necessary to adjust dtypes for Autogluon
syn = pd.read_parquet(os.path.join(ws, "SyntheticData"))
syn = syn.astype(train_df.dtypes.to_dict(), errors="ignore")
proba_syn_ag = run_autogluon(syn, test_df)

## Evaluation

In [None]:
approaches = {
    "original_AG": proba_ag[1].values,
    "syn_AG": proba_syn_ag[1].values,
    "probs": proba_df["proba_1"].values,
}
# True labels
y_true = test_df[TARGET].values

# Collect results
results = []
for name, y_score in approaches.items():
    # auc = evaluate_auc(y_true, y_score, POS_LABEL)
    auc = roc_auc_score(y_true, y_score)
    results.append({"approach": name, "auc": auc})

# Convert to summary DataFrame
summary_df = pd.DataFrame(results)
summary_df

In [None]:
fig = px.bar(
    summary_df,
    y="approach",
    x="auc",
    orientation="h",
    text="auc",  # show AUC value on bars
    title="AUC by Approach",
    labels={"approach": "Approach", "auc": "AUC"},
    color="approach",
)
min_auc = summary_df["auc"].min()
max_auc = summary_df["auc"].max()
margin = (max_auc - min_auc) * 0.1  # add 10% margin
fig.update_xaxes(range=[min_auc - margin, max_auc + margin])
fig.update_traces(texttemplate="%{text:.4f}", textposition="outside")
fig.show()