## Quickstart for MLFlow in Deep Learning Models. I will compare runs, choose a model, and deploy in a REST API.

Objectives:
- run a hyperparam sweep on a training script
- compare the results of runs in MLFlow UI
- Choose the best run and register the model
- Deploy to a REST API
- Build a container imagesuitable for deployment to a cloud platform
  

## Import, Version and SEED

In [1]:
import os, random
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split

from hyperopt import fmin, tpe, hp, STATUS_OK, Trials

from keras import layers

import mlflow
from mlflow.models import infer_signature

import keras
print(f"Keras version: {keras.__version__}")
import tensorflow as tf
print(f"TensorFlow version: {tf.__version__}")

from datetime import datetime

# Making sure of reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

Keras version: 3.11.1
TensorFlow version: 2.19.0


## Config

In [2]:
TRACKING_URI = "http://127.0.0.1:5000"
EXPERIMENT_NAME = "Wine Quality REDO"
EPOCHS = 3
BATCH_SIZE = 64
MAX_EVALS = 4

HYPERPARAM_SPACE = {
    "lr": hp.loguniform("lr", np.log(1e-5), np.log(1e-1)),
    "momentum": hp.uniform("momentum", 0.0, 1.0)
}

## Wire MLFlow

In [3]:
# set the tracking uri
mlflow.set_tracking_uri(TRACKING_URI)

# set the experiment name
mlflow.set_experiment(EXPERIMENT_NAME)

# set the global tags for the parent session
mlflow.set_tags({
    "project": "wine-quality-redo",
    "framework": "keras",
    "orchestrator": "hyperopt",
})

## load and split the data

In [4]:
# read the dataset
data=pd.read_csv(
    "https://raw.githubusercontent.com/mlflow/mlflow/master/tests/datasets/winequality-white.csv",
    sep=";",
)

# get the dependent and independent part
X = data.drop(columns=["quality"])
y = data.quality

# though keras accepts DataFrames, still converting to numpy arrays
X = X.values
y = y.values

# split into train test validation
X_train_val, X_test, y_train_val, y_test = train_test_split(X,y, random_state=42, test_size=0.20, stratify=y)
X_train,X_val, y_train, y_val = train_test_split(X_train_val,y_train_val, random_state=42, test_size=0.20, stratify=y_train_val)

print(f"shape of the training data: row-> {X_train.shape[0]}, columns-> {X_train.shape[1]}")
print(f"shape of the validation data: row-> {X_val.shape[0]}, columns-> {X_val.shape[1]}")
print(f"shape of the test data: row-> {X_test.shape[0]}, columns-> {X_test.shape[1]}")

shape of the training data: row-> 3134, columns-> 11
shape of the validation data: row-> 784, columns-> 11
shape of the test data: row-> 980, columns-> 11


## Build the model

In [5]:
def build_model(X_train):
    mean=np.mean(X_train, axis=0)
    variance=np.var(X_train, axis=0)

    model=keras.Sequential([
        keras.Input([X_train.shape[1]]),
        layers.Normalization(mean=mean, variance=variance),
        layers.Dense(64, activation="relu"),
        layers.Dense(1)
    ])

    optimizer = keras.optimizers.SGD()

    model.compile(
        optimizer=optimizer,
        loss="mse",
        metrics=[keras.metrics.RootMeanSquaredError()]
    )

    return model

## One trial block

In [6]:
def trial_one_trial(params, X_train, X_val, y_train, y_val):
    """
    Starts a child run → build → set hyperparam → fit → eval → log → return loss + return id
    """
    with mlflow.start_run(nested=True) as run:
        run_id = run.info.run_id

        ## build and set hyperparams
        model=build_model(X_train)
        ## set the learning rate
        model.optimizer.learning_rate = params["lr"]
        if hasattr(model.optimizer, "momentum"):
            model.optimizer.momentum = params["momentum"]

        ## fit the model
        history = model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs= EPOCHS,
            batch_size=BATCH_SIZE,
            verbose=1
        )

        # evaluate on validation data
        val_loss, val_rmse = model.evaluate(
            X_val,
            y_val,
            batch_size=BATCH_SIZE,
            verbose=1
        )

        # log params with MLFLOW
        mlflow.log_params({
            "lr": float(params["lr"]),
            "momentum": float(params["momentum"]),
            "epochs": EPOCHS,
            "batch_size": BATCH_SIZE,
            "seed": SEED
        })

        # log metric
        mlflow.log_metric("eval_rmse", val_rmse)

        # log the learning curve
        for step, rmse in enumerate(history.history["val_root_mean_squared_error"]):
            mlflow.log_metric("val_rmse_epochs", float(val_rmse), step=step)

        # signature
        signature = infer_signature(X_train, y_train)

        # log model
        mlflow.tensorflow.log_model(
            model=model,
            name="model",
            signature=signature,
            input_example=X_train
        )

        return {"loss": val_rmse, "status": STATUS_OK, "run_id": run_id}

## objective for hyperopt

In [7]:
def objective(hparams):
    return trial_one_trial(hparams, X_train, X_val, y_train, y_val)

## Orchestration of the entire study

In [8]:
def run_study(max_evals=MAX_EVALS, register_name=None):
    mlflow.end_run()

    # dynamic run names
    run_name = f"wine_white__keras_sgd__tpe__{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    
    with mlflow.start_run(run_name=run_name) as parent_run:
        trials=Trials()
        best_params = fmin(
            fn=objective,
            space=HYPERPARAM_SPACE,
            algo=tpe.suggest,
            max_evals=MAX_EVALS,
            trials=trials,
            rstate=np.random.default_rng(SEED)
        )
        # pick the best child by loss
        best_result = min(trials.results, key=lambda r:r["loss"])
        best_run_id = best_result["run_id"]
        best_loss = best_result["loss"]

        # summarize at parent level
        mlflow.log_metric("best_eval_rmse", best_loss)
        mlflow.set_tag("best_child_run_id", best_run_id)

        # model registry
        if register_name:
            model_uri=f"runs:/{best_run_id}/model"
            registered = mlflow.register_model(model_uri, register_name)
            print(f"Registered '{register_name}' v{registered.version} from run {best_run_id}")

        return best_params, best_run_id, best_loss

## LAUNCH!!!!

In [9]:
best_params, best_run_id, best_loss = run_study(register_name="best wine model")
print("Best params:", best_params)
print("Best child run_id:", best_run_id)
print("Best eval RMSE:", best_loss)

🏃 View run bright-rat-181 at: http://127.0.0.1:5000/#/experiments/250973868349298093/runs/1f043cbfcce54a7cb1544cef2daaa88e
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/250973868349298093
Epoch 1/3                                                                                                                                       

[1m 1/49[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m5s[0m 110ms/step - loss: 35.8807 - root_mean_squared_error: 5.9901                             
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 6.0195 - root_mean_squared_error: 2.4535 - val_loss: 1.7436 - val_root_mean_squared_error: 1.3205

Epoch 2/3                                                                                                                                       

[1m 1/49[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 7ms/step - loss: 1.4685 - root_mean_squared_error: 1.2118                                
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━

Registered model 'best wine model' already exists. Creating a new version of this model...
2025/08/10 19:00:39 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: best wine model, version 2



Registered 'best wine model' v2 from run d47a750a0372480485bd80011df30151
🏃 View run wine_white__keras_sgd__tpe__20250810_190025 at: http://127.0.0.1:5000/#/experiments/250973868349298093/runs/ce77a86d19a74090b27d7d98ab280953
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/250973868349298093
Best params: {'lr': np.float64(0.0030232634294854017), 'momentum': np.float64(0.8136554076402878)}
Best child run_id: d47a750a0372480485bd80011df30151
Best eval RMSE: 0.9642291069030762


Created version '2' of model 'best wine model'.


## load the model - best one

In [11]:
from mlflow import tensorflow as mlflow_tf

best_model = mlflow_tf.load_model('models:/best wine model/2')

Downloading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

In [12]:
y_pred_best = best_model.predict(X_test)

from sklearn.metrics import mean_squared_error

test_rmse = mean_squared_error(y_test, y_pred_best)
print(f"Test RMSE: {test_rmse:.4f}")

[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 701us/step
Test RMSE: 0.7805


In [13]:
## log the best model's test score
with mlflow.start_run(run_id="d47a750a0372480485bd80011df30151"):
    mlflow.log_metric("test_rmse", test_rmse)
    mlflow.set_tag("run_id", "d47a750a0372480485bd80011df30151")
    mlflow.set_tag("run_name", "caring-squirrel-151")
    mlflow.set_tag("best_model_eval","yes")

🏃 View run caring-squirrel-151 at: http://127.0.0.1:5000/#/experiments/250973868349298093/runs/d47a750a0372480485bd80011df30151
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/250973868349298093
