In [25]:
import keras
import numpy as np
import pandas as pd
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import mlflow
from mlflow.models import infer_signature
from mlflow.models import validate_serving_input
from mlflow.models import convert_input_example_to_serving_input
import warnings as wrn
wrn.filterwarnings(action="ignore")

In [2]:
# load the dataset
data=pd.read_csv(
    "https://raw.githubusercontent.com/mlflow/mlflow/master/tests/datasets/winequality-white.csv",
    sep=";",
)
data.head(3)

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6


In [7]:
train, test = train_test_split(data, test_size=.25, random_state=42)

train_x = train.drop(['quality'], axis=1).values
train_y = train[['quality']].values.ravel()

test_x = test.drop(['quality'], axis=1).values
test_y = test[['quality']].values.ravel()

train_x, valid_x, train_y, valid_y = train_test_split(train_x, train_y, test_size=.2,random_state=42)

print(train_x.shape, train_y.shape)
print(valid_x.shape, valid_y.shape)
print(test_x.shape, test_y.shape)

signature = infer_signature(train_x, train_y)
print("---------")
print(signature)

(2938, 11) (2938,)
(735, 11) (735,)
(1225, 11) (1225,)
---------
inputs: 
  [Tensor('float64', (-1, 11))]
outputs: 
  [Tensor('int64', (-1,))]
params: 
  None



In [20]:
def train_model(params, epochs, train_x, train_y, valid_x, valid_y):
    """Trains a TensorFlow model using the provided training data and logs the results using MLflow.

    Args:
        params (dict): Dictionary containing hyperparameters: 
            - lr (float) 
            - momentum (float)
        epochs (int): Number of epochs
        train_x (ndarray): Training feature set (shape: [num_samples, num_features])
        train_y (ndarray): Training labels (shape: [num_samples, 1])
        valid_x (ndarray): Validation feature set (shape: [num_samples, num_features]).
        valid_y (ndarray): Validation labels (shape: [num_samples, 1])

    Returns:
        dict: A dictionary containing:
            - loss (float)
            - status (hyperopt.STATUS_OK)
            - model (keras.Sequential)
    """
    
    mean = np.mean(train_x, axis=0)
    var = np.var(train_x, axis=0)
    
    model = keras.Sequential(
        [
            keras.Input([train_x.shape[1]]),
            keras.layers.Normalization(mean=mean, variance=var),
            keras.layers.Dense(64, activation="relu"),
            keras.layers.Dense(1)
        ]
    )
    
    model.compile(
        optimizer=keras.optimizers.SGD(learning_rate=params["lr"], momentum=params["momentum"]),
        loss="mean_squared_error",
        metrics=[keras.metrics.RootMeanSquaredError()]
    )
    
    with mlflow.start_run(nested=True):
        model.fit(
            train_x, train_y,
            validation_data=(valid_x, valid_y),
            epochs=epochs,
            batch_size=64
        )
        
        eval_result = model.evaluate(valid_x, valid_y, batch_size=64)
        eval_rmse = eval_result[1]
        
        mlflow.log_params(params)
        
        mlflow.log_metric("eval_rmse", eval_rmse)
        
        mlflow.tensorflow.log_model(model=model, artifact_path="model", signature=signature)
        
        return {
            "loss": eval_rmse,
            "status": STATUS_OK,
            "model": model
        }
        

In [21]:
def objective(params):
    results = train_model(
        params=params, epochs=5,
        train_x=train_x, train_y=train_y,
        valid_x=valid_x, valid_y=valid_y
    )
    return results

In [22]:
space={
    "lr": hp.loguniform("lr", np.log(1e-5), np.log(1e-1)),
    "momentum": hp.uniform("momentum", 0.0, 1.0)
}

In [23]:
mlflow.set_experiment("wine-quality")

with mlflow.start_run():
    
    # Conduct the hyperparameter search using Hyperopt
    trials = Trials()
    
    best = fmin(
        fn = objective, 
        space=space,
        algo=tpe.suggest, 
        max_evals=6, 
        trials=trials
    )
    
    # Fetch the details of the best run
    best_run = sorted(trials.results, key=lambda x: x["loss"])[0]
    
    # Log the best parameters, loss, and model
    mlflow.log_params(best)
    
    mlflow.log_metric("eval_rmse", best_run["loss"])
    
    mlflow.tensorflow.log_model(best_run["model"], "model", signature=signature)

    # Print out the best parameters and corresponding loss
    print(f"Best parameters: {best}")
    print(f"Best eval rmse: {best_run['loss']}")

Epoch 1/5                                            

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m22s[0m 502ms/step - loss: 36.1855 - root_mean_squared_error: 6.0154
[1m40/46[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 1ms/step - loss: 38.0548 - root_mean_squared_error: 6.1688   
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 38.0736 - root_mean_squared_error: 6.1703 - val_loss: 38.1775 - val_root_mean_squared_error: 6.1788

Epoch 2/5                                            

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 31ms/step - loss: 38.8460 - root_mean_squared_error: 6.2327
[1m44/46[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 1ms/step - loss: 37.8076 - root_mean_squared_error: 6.1487 
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 37.7776 - root_mean_squared_error: 6.1463 - val_loss: 37.3742 - val_root_mean_squared_error: 6.1134

Epoch 3/5                                   

In [26]:
# Inferencing
model_uri = 'runs:/2d55b10dac5a4c7eba5dc8c2ceec7d81/model'

# Define INPUT_EXAMPLE via assignment with your own input example to the model
# A valid input example is a data instance suitable for pyfunc prediction
serving_payload = convert_input_example_to_serving_input(test_x)

# Validate the serving payload works on the model
validate_serving_input(model_uri, serving_payload)

Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 495.10it/s] 


[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


array([[5.695498 ],
       [7.0315905],
       [6.3901463],
       ...,
       [6.5168996],
       [6.644665 ],
       [5.5790186]], dtype=float32)

In [27]:
# Load model as a PyFuncModel.
model_uri = 'runs:/2d55b10dac5a4c7eba5dc8c2ceec7d81/model'
loaded_model = mlflow.pyfunc.load_model(model_uri)

loaded_model.predict(pd.DataFrame(test_x))

[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


array([[5.695498 ],
       [7.0315905],
       [6.3901463],
       ...,
       [6.5168996],
       [6.644665 ],
       [5.5790186]], dtype=float32)

In [28]:
# Register in the model registry
mlflow.register_model(model_uri, "wine-quality")

Successfully registered model 'wine-quality'.
Created version '1' of model 'wine-quality'.


<ModelVersion: aliases=[], creation_timestamp=1741427149490, current_stage='None', description=None, last_updated_timestamp=1741427149490, name='wine-quality', run_id='2d55b10dac5a4c7eba5dc8c2ceec7d81', run_link=None, source='file:///d:/MLOpsProjects/MLflow%20keras/mlruns/315369149838321762/2d55b10dac5a4c7eba5dc8c2ceec7d81/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=1>