# MLflow Demo: Experiment Tracking in Practice

In this notebook we will:

1. Train a simple ML model (RandomForest on Iris).
2. Log parameters, metrics, and model to **MLflow**.
3. Compare multiple runs in the **MLflow UI dashboard**.

In [None]:
!pip install mlflow scikit-learn matplotlib -q


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.0/40.0 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.9/8.9 MB[0m [31m55.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m61.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m51.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m147.8/147.8 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m114.9/114.9 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.0/85.0 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.8/76.8 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
# Cell 2: Imports and MLflow setup

import mlflow
import mlflow.sklearn
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report
import matplotlib.pyplot as plt
import numpy as np
import os
import tempfile

# Set a local folder to store MLflow runs
mlflow.set_tracking_uri("mlruns")  # creates ./mlruns folder

# Create / use an experiment
experiment_name = "MLflow_Iris_Demo"
mlflow.set_experiment(experiment_name)

print("MLflow tracking URI:", mlflow.get_tracking_uri())
print("MLflow experiment name:", experiment_name)


  return FileStore(store_uri, store_uri)
2025/11/30 19:58:42 INFO mlflow.tracking.fluent: Experiment with name 'MLflow_Iris_Demo' does not exist. Creating a new experiment.


MLflow tracking URI: mlruns
MLflow experiment name: MLflow_Iris_Demo


In [None]:
# Cell 3: Load Iris dataset

iris = load_iris()
X = iris.data
y = iris.target

print("Features shape:", X.shape)
print("Targets shape:", y.shape)
print("Target classes:", iris.target_names)

# Split into train and test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)


Features shape: (150, 4)
Targets shape: (150,)
Target classes: ['setosa' 'versicolor' 'virginica']


In [None]:
# Cell 4: Train & log function

def train_and_log_model(n_estimators=100, max_depth=None, run_name=None):
    """
    Train a RandomForest with given hyperparameters,
    evaluate it, and log everything to MLflow.
    """
    with mlflow.start_run(run_name=run_name):
        # 1) Log parameters (hyperparameters)
        mlflow.log_param("n_estimators", n_estimators)
        mlflow.log_param("max_depth", max_depth)

        # 2) Train model
        model = RandomForestClassifier(
            n_estimators=n_estimators,
            max_depth=max_depth,
            random_state=42
        )
        model.fit(X_train, y_train)

        # 3) Evaluate model
        y_pred = model.predict(X_test)
        acc = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred, average="weighted")

        # 4) Log metrics
        mlflow.log_metric("accuracy", acc)
        mlflow.log_metric("f1_weighted", f1)

        # 5) Log model itself
        mlflow.sklearn.log_model(model, artifact_path="model")

        # 6) (Optional) Log a text report as an artifact
        report = classification_report(y_test, y_pred, target_names=iris.target_names)
        print("Classification report:\n", report)

        # Save report to a temporary file and log it
        with tempfile.TemporaryDirectory() as tmpdir:
            report_path = os.path.join(tmpdir, "classification_report.txt")
            with open(report_path, "w") as f:
                f.write(report)
            mlflow.log_artifact(report_path, artifact_path="reports")

        print(f"Logged to run_id: {mlflow.active_run().info.run_id}")
        print(f"Accuracy: {acc:.4f} | F1-weighted: {f1:.4f}")


In [None]:
# Cell 5: Run a few experiments with different hyperparameters

print("=== Run 1: small forest, shallow depth ===")
train_and_log_model(n_estimators=20, max_depth=3, run_name="rf_small_shallow")

print("\n=== Run 2: medium forest, deeper ===")
train_and_log_model(n_estimators=50, max_depth=5, run_name="rf_medium")

print("\n=== Run 3: large forest, unlimited depth ===")
train_and_log_model(n_estimators=200, max_depth=None, run_name="rf_large_deep")


=== Run 1: small forest, shallow depth ===




Classification report:
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        15
  versicolor       0.82      0.93      0.88        15
   virginica       0.92      0.80      0.86        15

    accuracy                           0.91        45
   macro avg       0.92      0.91      0.91        45
weighted avg       0.92      0.91      0.91        45

Logged to run_id: 5975bd5c0de344aeb522dc156fc0a3ff
Accuracy: 0.9111 | F1-weighted: 0.9107

=== Run 2: medium forest, deeper ===




Classification report:
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        15
  versicolor       0.78      0.93      0.85        15
   virginica       0.92      0.73      0.81        15

    accuracy                           0.89        45
   macro avg       0.90      0.89      0.89        45
weighted avg       0.90      0.89      0.89        45

Logged to run_id: d937e1000f9f47208a506de7b12ca16a
Accuracy: 0.8889 | F1-weighted: 0.8878

=== Run 3: large forest, unlimited depth ===




Classification report:
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        15
  versicolor       0.82      0.93      0.88        15
   virginica       0.92      0.80      0.86        15

    accuracy                           0.91        45
   macro avg       0.92      0.91      0.91        45
weighted avg       0.92      0.91      0.91        45

Logged to run_id: d7ed3894ee914ab2b46d5ff815edad52
Accuracy: 0.9111 | F1-weighted: 0.9107


mlflow ui --host 127.0.0.1 --port 5000
