## Quickstart: Compare runs, choose a model, and deploy it to a REST API

In this quickstart, you will:

* Run a hyperparameter sweep on a training script
* Compare the results of the runs in the MLflow UI
* Choose the best run and register it as a model
* Deploy the model to a REST API
* Build a container image suitable for deployment to a cloud platform

In [1]:
# !pip install hyperopt

In [2]:
import keras
import numpy as np 
import pandas as pd 
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

import mlflow 
from mlflow.models import infer_signature

In [3]:
# Load Dataset 
data = pd.read_csv(
    "https://raw.githubusercontent.com/mlflow/mlflow/master/tests/datasets/winequality-white.csv",
    sep=";",
)

# Split the data into training, validation, and test sets
train, test = train_test_split(data, test_size=0.25, random_state=42)
train_x = train.drop(["quality"], axis=1).values
train_y = train[["quality"]].values.ravel()
test_x = test.drop(["quality"], axis=1).values
test_y = test[["quality"]].values.ravel()
train_x, valid_x, train_y, valid_y = train_test_split(
    train_x, train_y, test_size=0.2, random_state=42
)

In MLflow, infer_signature is a function used to automatically generate a model signature, which defines the schema of inputs and outputs for a machine learning model. This signature is helpful for tracking and deploying models, as it ensures that input data types and shapes are compatible with the model's requirements during both training and inference.

**Usage**
The function infer_signature is typically imported from mlflow.models and takes in:

* input_df: a DataFrame representing sample input data for the model.
* output_df: a DataFrame or Series representing the model's output data based on the given input data.

In [4]:
signature = infer_signature(train_x, train_y)

In [None]:
def train_model(params, epochs, train_x, train_y, valid_x, valid_y, test_x, test_y):
    # Define model architecture
    mean = np.mean(train_x, axis = 0)
    var = np.var(train_x, axis = 0)
    
    model = keras.Sequential(
        [
            keras.Input([train_x.shape[1]]),
            keras.layers.Normalization(mean = mean, variance = var),
            keras.layers.Dense(64, activation="relu"),
            keras.layers.Dense(1),
        ]
    )
    
    # Compile model
    model.compile(
        optimizer = keras.optimizers.SGD(
            learning_rate = params['lr'], momentum = params['momentum']
        ),
        loss = 'mean_squared_error',
        metrics = [keras.metrics.RootMeanSquaredError()],
    )
    
    # Train model with MLflow tracking
    with mlflow.start_run(nested = True):
        model.fit(
            train_x,
            train_y,
            validation_data = (valid_x, valid_y),
            epochs = epochs,
            batch_size = 64
        )
        
        eval_result = model.evaluate(valid_x, valid_y, batch_size = 64)
        eval_rmse = eval_result[1]
        
        # Log Parameters
        mlflow.log_params(params)
        mlflow.log_metric("eval_rmse", eval_rmse)
        
        # Log Model
        mlflow.tensorflow.log_model(model, "model", signature = signature)
        
        return {"loss": eval_rmse, "status": STATUS_OK, "model":model}

* The objective function takes in the hyperparameters and returns the results of the **train_model** function for that set of hyperparameters.

In [6]:
def objective(params):
    # MLflow will track the parameters and results for each run
    result = train_model(
        params,
        epochs=3,
        train_x=train_x,
        train_y=train_y,
        valid_x=valid_x,
        valid_y=valid_y,
        test_x=test_x,
        test_y=test_y,
    )
    return result

In [7]:
space = {
    "lr": hp.loguniform("lr", np.log(1e-5), np.log(1e-1)),
    "momentum": hp.uniform("momentum", 0.0, 1.0),
}

In [8]:
mlflow.set_experiment("/wine-quality_new")
with mlflow.start_run():
    # Conduct the hyperparameter search using Hyperopt
    trials = Trials()
    best = fmin(
        fn=objective,
        space=space,
        algo=tpe.suggest,
        max_evals = 8,
        trials=trials,
    )

    # Fetch the details of the best run
    best_run = sorted(trials.results, key=lambda x: x["loss"])[0]

    # Log the best parameters, loss, and model
    mlflow.log_params(best)
    mlflow.log_metric("eval_rmse", best_run["loss"])
    mlflow.tensorflow.log_model(best_run["model"], "model", signature=signature)

    # Print out the best parameters and corresponding loss
    print(f"Best parameters: {best}")
    print(f"Best eval rmse: {best_run['loss']}")

2024/11/14 15:26:33 INFO mlflow.tracking.fluent: Experiment with name '/wine-quality_new' does not exist. Creating a new experiment.


Epoch 1/3                                            

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m27s[0m 605ms/step - loss: 36.7254 - root_mean_squared_error: 6.0601
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 32.4579 - root_mean_squared_error: 5.6926 - val_loss: 19.8713 - val_root_mean_squared_error: 4.4577

Epoch 2/3                                            

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 21ms/step - loss: 20.0832 - root_mean_squared_error: 4.4814
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 16.7474 - root_mean_squared_error: 4.0886 - val_loss: 9.8799 - val_root_mean_squared_error: 3.1432

Epoch 3/3                                            

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 17ms/step - loss: 9.1809 - root_mean_squared_error: 3.0300
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 8.3195 - root_mean_squared_error: 2.8819 - va

In [9]:
import mlflow
mlflow.set_tracking_uri(uri = "http://127.0.0.1:8080/")