In [1]:
import sys

import pandas as pd
import tensorflow as tf
import numpy as np
import hyperopt #STATUS_OK, Trials, fmin, hp, tpe
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

import mlflow

sys.path.append("../functions")
from mlflow_utils import start_mlflow_server, mlflow_train_keras_model

In [2]:
#load toy data
data = pd.read_csv(
    "https://raw.githubusercontent.com/mlflow/mlflow/master/tests/datasets/winequality-white.csv",
    sep=";",
)

#split the data into training, validation, and test sets
train, test = train_test_split(data, test_size=0.25, random_state=42)
X_tv = train.drop(["quality"], axis=1).values
y_tv = train[["quality"]].values.ravel()
X_test = test.drop(["quality"], axis=1).values
y_test = test[["quality"]].values.ravel()
X_train, X_valid, y_train, y_valid = train_test_split(
    X_tv, y_tv, test_size=0.2, random_state=42
)

In [3]:
#start the mlflow server
#it can be accessed in your browser at http://127.0.0.1:5000
start_mlflow_server(experiment_name="example_experiment") #take care to choose the proper experiment name

Tracking URI set to: file:///c:/Users/Eugen/neuefische/Project__Wildlife_Images/mlflow
MLflow server started in the background


2024/05/16 14:19:24 INFO mlflow.tracking.fluent: Experiment with name 'example_experiment' does not exist. Creating a new experiment.


MLflow server is running


In [4]:
#define a function similar to this one (should have the same parameter list and return value)
#supply your own search_params (see below) and don't worry about the components variable

def train_model_sample(search_params, components, X_train, y_train, X_valid, y_valid):

    #define some model
    mean = np.mean(X_train, axis=0)
    var = np.var(X_train, axis=0)
    model = tf.keras.Sequential(
        [
            tf.keras.Input([X_train.shape[1]]),
            tf.keras.layers.Normalization(mean=mean, variance=var),
            tf.keras.layers.Dense(search_params["n_dense_neurons"], activation="relu"), #wherever applicable, insert hyperparameters from the search_params dict (your search space)
            tf.keras.layers.Dense(1),
        ]
    )

    #compile model
    model.compile(
        optimizer=tf.keras.optimizers.SGD(
            learning_rate=search_params["lr"], momentum=search_params["momentum"]
        ),
        loss="mean_squared_error",
        metrics=[tf.keras.metrics.RootMeanSquaredError()],
    )

    #train model with MLflow tracking
    with mlflow.start_run(nested=True):
        history = model.fit(
            X_train,
            y_train,
            validation_data=(X_valid, y_valid),
            epochs=10,
            batch_size=64,
            callbacks=[components["mlflow_logger"]] #please include this callback so epoch-wise performance gets logged by mlflow
        )
        #evaluate the model
        eval_result = model.evaluate(X_valid, y_valid, batch_size=64)
        val_loss = eval_result[1]

        #log parameters and results
        mlflow.log_params(search_params)
        mlflow.log_metric("final_val_loss", val_loss)

        #log model
        mlflow.tensorflow.log_model(model, "model", signature=components["signature"])

        #MUST return "loss" and "status" in this dictionary
        return {"loss": val_loss, "status": hyperopt.STATUS_OK, "model": model} 


#define a hyperopt search space for hyperparameter optimization
#more options can be found here: https://github.com/hyperopt/hyperopt/wiki/FMin#21-parameter-expressions
search_space = {
    "lr": hyperopt.hp.loguniform("lr", np.log(1e-5), np.log(1e-1)),
    "momentum": hyperopt.hp.uniform("momentum", 0.0, 1.0),
    "n_dense_neurons": hyperopt.hp.uniformint("n_dense_neurons", 16, 512),
}

#create some tags for the current run (must be dictionary, but you can freely name those key-value pairs)
#we should probably decide a set of tags that need to be included
tags = {
    "model_type": "Sequential",
    "data_amount": "all",
    "optimizer": "SGD"
}

#run the model fit with hyperparameter search
#the returned run object can be used to extract the best model of this run
my_run = mlflow_train_keras_model(
    train_fn=train_model_sample,
    X_train=X_train,
    y_train=y_train,
    X_valid=X_valid,
    y_valid=y_valid,
    search_space=search_space,
    n_evals=5,
    mlflow_tags=tags
)

Epoch 1/10                                           

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m29s[0m 665ms/step - loss: 32.7646 - root_mean_squared_error: 5.7240
[1m39/46[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m0s[0m 1ms/step - loss: 33.4588 - root_mean_squared_error: 5.7842   
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 33.3471 - root_mean_squared_error: 5.7745 - val_loss: 31.1689 - val_root_mean_squared_error: 5.5829

Epoch 2/10                                           

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 37ms/step - loss: 32.3375 - root_mean_squared_error: 5.6866
[1m45/46[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 1ms/step - loss: 30.4238 - root_mean_squared_error: 5.5154 
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 30.3761 - root_mean_squared_error: 5.5110 - val_loss: 27.8697 - val_root_mean_squared_error: 5.2792

Epoch 3/10                                  




Epoch 1/10                                                                     

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m27s[0m 620ms/step - loss: 35.4775 - root_mean_squared_error: 5.9563
[1m40/46[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 1ms/step - loss: 35.7278 - root_mean_squared_error: 5.9773   
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 35.7190 - root_mean_squared_error: 5.9765 - val_loss: 35.3971 - val_root_mean_squared_error: 5.9495

Epoch 2/10                                                                     

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 34ms/step - loss: 37.3285 - root_mean_squared_error: 6.1097
[1m40/46[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 1ms/step - loss: 34.9792 - root_mean_squared_error: 5.9141 
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 34.9208 - root_mean_squared_error: 5.9092 - val_loss: 34.3281 - val_root_mean_squared_error: 

In [5]:
#saving and loading models

#register model from current run
run_id = my_run.info.run_id
model_uri = f"runs:/{run_id}/model"
model_name = "my_best_model"
mlflow.register_model(model_uri=model_uri, name=model_name)

#load model from run
loaded_model_from_run = mlflow.tensorflow.load_model(model_uri)

#load registered model
model_version = 1
model_uri2 =  f"models:/{model_name}/{model_version}"
loaded_model_from_registry = mlflow.tensorflow.load_model(model_uri2)

#use model as desired
y_test_pred1 = loaded_model_from_run.predict(X_test)
y_test_pred2 = loaded_model_from_registry.predict(X_test)
assert((y_test_pred1 == y_test_pred2).all())
mean_squared_error(y_test, y_test_pred1)

Successfully registered model 'my_best_model'.
Created version '1' of model 'my_best_model'.


[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


11.208863091728901