# ITU 2025/2026 MLOps Presentation

**Core demo focus:**
- Logging multiple runs across models
- Logging hyperparameters via `mlflow.log_params`
- Logging metrics via `mlflow.log_metric`
- Logging models via `mlflow.sklearn.log_model`
- **All visuals are reviewed in MLflow UI using Compare & Metrics panels, not plotted in the notebook**

**Models demonstrated:**
- Logistic Regression
- Random Forest sweep
- K-Nearest Neighbors 

Run `mlflow ui --host 127.0.0.1 --port 5000` after execution and use **Compare view** for presentation.


In [11]:
import mlflow
mlflow.set_experiment("MLflow Class Demo")

import numpy as np
import pandas as pd

from sklearn import datasets
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load a harder dataset: Digits (10 classes, 64 features)
X, y = datasets.load_digits(return_X_y=True)

# Train/test split (stratified to keep class balance)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)


2025/12/01 12:08:20 INFO mlflow.tracking.fluent: Experiment with name 'MLflow Class Demo' does not exist. Creating a new experiment.


In [12]:
# Baseline Logistic Regression

params = {
    "solver": "lbfgs",
    "max_iter": 500,
    "multi_class": "auto",
    "random_state": 8888,
}

with mlflow.start_run(run_name="LR_baseline"):
    # Log hyperparameters
    mlflow.log_params(params)

    # Train the model
    lr = LogisticRegression(**params)
    lr.fit(X_train, y_train)

    # Log the model with a name and input_examples
    mlflow.sklearn.log_model(
        sk_model=lr,
        name="digits_lr_baseline",
        input_example=X_test[:1],
    )

    # Predict on the test set
    y_pred = lr.predict(X_test)

    # Compute and log metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average="macro")
    recall = recall_score(y_test, y_pred, average="macro")
    f1 = f1_score(y_test, y_pred, average="macro")

    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("precision_macro", precision)
    mlflow.log_metric("recall_macro", recall)
    mlflow.log_metric("f1_macro", f1)

    mlflow.set_tag("Training Info", "Baseline Logistic Regression on Digits dataset")

    print(f"LR_baseline -> accuracy={accuracy:.3f}, f1_macro={f1:.3f}")




LR_baseline -> accuracy=0.957, f1_macro=0.957


In [16]:
# Logistic Regression experiments with different C values

C_values = ["0_01", "0_1", "1_0", "5_0", "10_0"]  # sanitized version for naming
C_real = [0.01, 0.1, 1.0, 5.0, 10.0]   # actual hyperparameters

for i, C in enumerate(C_real):
    params_lr = {
        "solver": "lbfgs",
        "max_iter": 500,
        "multi_class": "auto",
        "random_state": 8888,
        "C": C,
    }

    C_name = C_values[i]  # use sanitized version only for model name

    with mlflow.start_run(run_name=f"LR_C={C_name}"):
        mlflow.log_params(params_lr)

        model = LogisticRegression(**params_lr)
        model.fit(X_train, y_train)

        mlflow.sklearn.log_model(
            sk_model=model,
            name=f"digits_lr_C_{C_name}",
            input_example=X_test[:1],
        )

        y_pred = model.predict(X_test)

        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average="macro")
        recall = recall_score(y_test, y_pred, average="macro")
        f1 = f1_score(y_test, y_pred, average="macro")

        mlflow.log_metric("accuracy", accuracy)
        mlflow.log_metric("precision_macro", precision)
        mlflow.log_metric("recall_macro", recall)
        mlflow.log_metric("f1_macro", f1)

        mlflow.set_tag(
            "Training Info",
            f"Logistic Regression with C={C_name} on Digits dataset",
        )

        print(f"LR_C={C_name} -> accuracy={accuracy:.3f}, f1_macro={f1:.3f}")




LR_C=0_01 -> accuracy=0.967, f1_macro=0.967




LR_C=0_1 -> accuracy=0.961, f1_macro=0.961




LR_C=1_0 -> accuracy=0.957, f1_macro=0.957




LR_C=5_0 -> accuracy=0.954, f1_macro=0.953




LR_C=10_0 -> accuracy=0.954, f1_macro=0.954


In [17]:
import mlflow
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Random Forest experiments with different numbers of trees
n_estimators_list = [10, 50, 100, 200]

for n_estimators in n_estimators_list:
    # sanitize name (replace any invalid chars if needed)
    n_name = str(n_estimators)

    params_rf = {
        "n_estimators": n_estimators,
        "max_depth": None,
        "random_state": 8888,
    }

    with mlflow.start_run(run_name=f"RF_n_estimators={n_name}"):
        # Log the hyperparameters
        mlflow.log_params(params_rf)

        # Train the model
        model = RandomForestClassifier(**params_rf)
        model.fit(X_train, y_train)

        # Log the model cleanly for the :contentReference[oaicite:0]{index=0}
        mlflow.sklearn.log_model(
            sk_model=model,
            name=f"digits_rf_{n_name}_trees",
            input_example=X_test[:1],
        )

        # Predict on the test set
        y_pred = model.predict(X_test)

        # Compute metrics
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average="macro")
        recall = recall_score(y_test, y_pred, average="macro")
        f1 = f1_score(y_test, y_pred, average="macro")

        # Log metrics so they appear visually in the :contentReference[oaicite:1]{index=1}
        mlflow.log_metric("accuracy", accuracy)
        mlflow.log_metric("precision_macro", precision)
        mlflow.log_metric("recall_macro", recall)
        mlflow.log_metric("f1_macro", f1)

        # Tag the run
        mlflow.set_tag(
            "Training Info",
            f"Random Forest with n_estimators={n_estimators} on digits data"
        )

        # Print summary
        print(f"RF_n_estimators={n_estimators} -> accuracy={accuracy:.3f}, f1_macro={f1:.3f}")


RF_n_estimators=10 -> accuracy=0.928, f1_macro=0.927
RF_n_estimators=50 -> accuracy=0.970, f1_macro=0.970
RF_n_estimators=100 -> accuracy=0.972, f1_macro=0.972
RF_n_estimators=200 -> accuracy=0.970, f1_macro=0.970


In [19]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import mlflow

# K-Nearest Neighbors experiments with different k values
k_values = [1, 3, 5, 9, 15]

for k in k_values:
    # sanitize k for naming safety
    k_name = str(k)

    params_knn = {
        "n_neighbors": k,
    }

    with mlflow.start_run(run_name=f"KNN_k={k_name}"):
        # log parameters
        mlflow.log_params(params_knn)

        # train model
        model = KNeighborsClassifier(**params_knn)
        model.fit(X_train, y_train)

        # log model so :contentReference[oaicite:0]{index=0} can show it without warnings
        mlflow.sklearn.log_model(
            sk_model=model,
            name=f"digits_knn_k_{k_name}",
            input_example=X_test[:1],
        )

        # inference
        y_pred = model.predict(X_test)

        # metrics
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average="macro")
        recall = recall_score(y_test, y_pred, average="macro")
        f1 = f1_score(y_test, y_pred, average="macro")

        # log metrics so :contentReference[oaicite:1]{index=1} can plot them
        mlflow.log_metric("accuracy", accuracy)
        mlflow.log_metric("precision_macro", precision)
        mlflow.log_metric("recall_macro", recall)
        mlflow.log_metric("f1_macro", f1)

        # tag for demo clarity
        mlflow.set_tag(
            "Training Info",
            f"KNN with k={k} on digits data"
        )

        # summary print
        print(f"KNN_k={k} -> accuracy={accuracy:.3f}, f1_macro={f1:.3f}")


KNN_k=1 -> accuracy=0.987, f1_macro=0.987
KNN_k=3 -> accuracy=0.987, f1_macro=0.987
KNN_k=5 -> accuracy=0.987, f1_macro=0.987
KNN_k=9 -> accuracy=0.978, f1_macro=0.978
KNN_k=15 -> accuracy=0.972, f1_macro=0.972
