# Parent-Child Runs in MLflow

This is a minimal notebook demonstrating parent-child runs in MLflow.

In [None]:
# Import common libraries
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

import mlflow

In [None]:
# Load data
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Define models to compare
models = {
    "RandomForest": RandomForestClassifier(random_state=42),
    "LogisticRegression": LogisticRegression(random_state=42, max_iter=200),
    "SVM": SVC(random_state=42, probability=True),
}

In [None]:
# Parent run to compare multiple models
with mlflow.start_run(run_name="parent_model_comparison") as parent_run:
    parent_run_id = parent_run.info.run_id
    mlflow.log_param("dataset", "iris")
    mlflow.log_param("test_size", 0.2)

    best_accuracy = 0
    best_model = None

    # Child runs for each model
    for model_name, model in models.items():
        with mlflow.start_run(run_name=f"child_{model_name}", nested=True) as child_run:
            # Log model info
            mlflow.log_param("model_type", model_name)

            # Train and evaluate
            model.fit(X_train, y_train)
            accuracy = model.score(X_test, y_test)

            # Log metrics
            mlflow.log_metric("accuracy", accuracy)

            # Track best model
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_model = model_name

    # Log best model in parent run
    mlflow.log_param("best_model", best_model)
    mlflow.log_metric("best_accuracy", best_accuracy)