In [1]:

import mlflow.sklearn
from sklearn.datasets import fetch_openml
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

# 1. Set experiment name
mlflow.set_experiment("Wine_Quality_Baseline_Models")

# 2. Enable autologging
mlflow.sklearn.autolog()

# 3. Load the wine dataset
wine = fetch_openml("wine-quality-red", version=1, as_frame=True)
X = wine.data
y = (wine.target.astype(int) >= 6).astype(int)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print("Setup complete. Data loaded and autologging enabled.")

Setup complete. Data loaded and autologging enabled.


In [2]:
# Define configurations to iterate through
# Each entry: (ClassifierClass, list of hyperparameter dictionaries)
model_configs = [
    (LogisticRegression, [
        {"max_iter": 100, "solver": "liblinear"},
        {"max_iter": 500, "solver": "lbfgs"}
    ]),
    (DecisionTreeClassifier, [
        {"max_depth": 5, "criterion": "gini"},
        {"max_depth": 10, "criterion": "entropy"}
    ]),
    (RandomForestClassifier, [
        {"n_estimators": 50, "max_depth": 5},
        {"n_estimators": 150, "max_depth": 10}
    ]),
    (AdaBoostClassifier, [
        {"n_estimators": 50, "learning_rate": 0.1},
        {"n_estimators": 100, "learning_rate": 1.0}
    ])
]

# Loop through models and hyperparameters
for model_class, param_list in model_configs:
    for params in param_list:
        # Start a manual run to give it a descriptive name in UI
        with mlflow.start_run(run_name=f"{model_class.__name__}_Run"):
            # Initialize and train
            model = model_class(**params)
            model.fit(X_train, y_train)

            # Predictions
            y_pred = model.predict(X_test)

            # 4. Record manual evaluation metrics on the held-out test set
            # Autolog handles training metrics, but test metrics are added here
            mlflow.log_metrics({
                "test_accuracy": accuracy_score(y_test, y_pred),
                "test_precision": precision_score(y_test, y_pred),
                "test_recall": recall_score(y_test, y_pred),
                "test_f1": f1_score(y_test, y_pred)
            })

            print(f"Finished run for {model_class.__name__} with params: {params}")

Finished run for AdaBoostClassifier with params: {'n_estimators': 100, 'learning_rate': 1.0}


In [3]:
# Search for the best model based on the test_f1 metric we logged
runs = mlflow.search_runs(experiment_names=["Wine_Quality_Baseline_Models"])
best_run = runs.sort_values("metrics.test_f1", ascending=False).iloc[0]

print(f"Best Model Type: {best_run['tags.mlflow.runName']}")
print(f"Best Test F1: {best_run['metrics.test_f1']}")
print(f"Run ID: {best_run['run_id']}")

# Example of how to load the model artifact from that run
loaded_model = mlflow.sklearn.load_model(f"runs:/{best_run['run_id']}/model")


