compare runs,choose model,deploy to rest api

In [1]:
### 
# run hyperparamter sweep on a training script
# Compare the resuts of the runs in Mlflow ui
# Choose the best run and resister it as amodel
#Deploy the model to a rest api
#build a container image suitable for deployment to a cloud platform

In [7]:
import keras

In [8]:
import numpy as np
import pandas as pd
from hyperopt import STATUS_OK,Trials,fmin,hp,tpe
from sklearn.model_selection import train_test_split

import mlflow 
from mlflow.models import infer_signature

In [9]:
import pandas as pd

# Load the dataset
data = pd.read_csv(
    "https://raw.githubusercontent.com/mlflow/mlflow/master/tests/datasets/winequality-white.csv",
    sep=";"
)

# Display the first few rows
print(data.head())


   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.0              0.27         0.36            20.7      0.045   
1            6.3              0.30         0.34             1.6      0.049   
2            8.1              0.28         0.40             6.9      0.050   
3            7.2              0.23         0.32             8.5      0.058   
4            7.2              0.23         0.32             8.5      0.058   

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                 45.0                 170.0   1.0010  3.00       0.45   
1                 14.0                 132.0   0.9940  3.30       0.49   
2                 30.0                  97.0   0.9951  3.26       0.44   
3                 47.0                 186.0   0.9956  3.19       0.40   
4                 47.0                 186.0   0.9956  3.19       0.40   

   alcohol  quality  
0      8.8        6  
1      9.5        6  
2     10.1        6 

In [10]:
## Split the data into training ,validation and test sets

train,test=train_test_split(data,test_size=0.25,random_state=42)

In [18]:
# Extract features and target
X = train.drop(['quality'], axis=1).values
y = train[['quality']].values.ravel()

# Split into training and validation sets
train_x, valid_x, train_y, valid_y = train_test_split(X, y, test_size=0.20, random_state=42)

# Infer MLflow model signature
signature = infer_signature(train_x, train_y)


In [19]:
np.mean(train_x,axis=0)

array([6.86621852e+00, 2.80377808e-01, 3.32597005e-01, 6.42164738e+00,
       4.55513955e-02, 3.53556841e+01, 1.38792376e+02, 9.94074221e-01,
       3.18919333e+00, 4.88396869e-01, 1.05005673e+01])

In [20]:
import numpy as np
import mlflow
import keras
from keras import layers
from keras.models import Sequential
from keras.optimizers import SGD
from keras.metrics import RootMeanSquaredError
from hyperopt import STATUS_OK

def train_model(params, epochs, train_x, train_y, valid_x, valid_y, test_x, test_y):
    """
    Trains an Artificial Neural Network (ANN) using the given hyperparameters and logs the experiment in MLflow.

    Args:
        params (dict): Dictionary of hyperparameters (learning rate, momentum).
        epochs (int): Number of epochs to train.
        train_x, train_y: Training data.
        valid_x, valid_y: Validation data.
        test_x, test_y: Test data.

    Returns:
        dict: Dictionary containing loss, status, and trained model.
    """

    ## Compute normalization parameters (mean & variance) from training data
    mean = np.mean(train_x, axis=0)
    var = np.var(train_x, axis=0)

    ## Define ANN model architecture
    model = Sequential([
        keras.Input(shape=[train_x.shape[1]]),  # Input layer
        layers.Normalization(mean=mean, variance=var),  # Normalization layer
        layers.Dense(64, activation="relu"),  # Hidden layer with 64 neurons
        layers.Dense(1)  # Output layer with a single neuron (for regression)
    ])

    ## Compile the model with SGD optimizer
    model.compile(
        optimizer=SGD(learning_rate=params["lr"], momentum=params["momentum"]),
        loss="mean_squared_error",
        metrics=[RootMeanSquaredError()]  # Ensure metrics are properly instantiated
    )

    ## Start MLflow tracking for the experiment
    with mlflow.start_run(nested=True):
        print("Training model with parameters:", params)

        ## Train the model with the given parameters
        model.fit(
            train_x, train_y,
            validation_data=(valid_x, valid_y),
            epochs=epochs,  # Fixed typo from `spochs` to `epochs`
            batch_size=64,
            verbose=1
        )

        ## Evaluate the model on validation data
        eval_result = model.evaluate(valid_x, valid_y, batch_size=64, verbose=0)
        eval_rmse = eval_result[1]  # Root Mean Squared Error (RMSE)

        ## Log hyperparameters and evaluation results to MLflow
        mlflow.log_params(params)
        mlflow.log_metric("eval_rmse", eval_rmse)

        ## Log the trained model into MLflow
        mlflow.tensorflow.log_model(model, "model")

        ## Return the evaluation loss, status, and trained model
        return {"loss": eval_rmse, "status": STATUS_OK, "model": model}


In [21]:
# Extract features and target from the test set
test_x = test.drop(['quality'], axis=1).values
test_y = test[['quality']].values.ravel()


In [22]:
def objective(params):
    # MLflow track paramters and result for each run
    result=train_model(
        params,
        epochs=3,
        train_x=train_x,
        train_y=train_y,
        valid_x=valid_x,
        valid_y=valid_y,
        test_x=test_x,
        test_y=test_y
    )
    return result

In [23]:
space={
    "lr":hp.loguniform("lr",np.log(1e-5),np.log(1e-1)),
    "momentum":hp.uniform("momentum",0.0,1.0)
}

In [None]:
import mlflow
from hyperopt import fmin, tpe, Trials

# Set the MLflow experiment name
mlflow.set_experiment("/wine-quality")

# Start an MLflow run
with mlflow.start_run():
    # Conduct hyperparameter search using Hyperopt
    trials = Trials()  # ✅ FIX: Correctly initializing Trials object

    best = fmin(
        fn=objective,       # Objective function
        space=space,        # Hyperparameter search space
        algo=tpe.suggest,   # ✅ FIX: Use `tpe.suggest`, not `type.suggest`
        max_evals=4,        # Number of evaluations
        trials=trials       # Store trial results
    )

    # Fetch the details of the best run
    best_run = sorted(trials.trials, key=lambda x: x['result']["loss"])[0]['result']  # ✅ FIX

    # Log the best parameters, loss, and model
    mlflow.log_params(best)
    mlflow.log_metric("eval_rmse", best_run["loss"])
    
    # ✅ FIX: Ensure model is properly logged
    mlflow.tensorflow.log_model(best_run["model"], "model")

    # Print out best parameters and corresponding loss
    print(f"Best parameters: {best}")
    print(f"Best eval RMSE: {best_run['loss']}")


  0%|          | 0/4 [00:00<?, ?trial/s, best loss=?]

Training model with parameters:                      
{'lr': 0.007160554161515186, 'momentum': 0.17352399298540233}
Epoch 1/3                                            

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m40s[0m 906ms/step - loss: 39.7480 - root_mean_squared_error: 6.3046
[1m11/46[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m0s[0m 5ms/step - loss: 29.1798 - root_mean_squared_error: 5.3728   
[1m20/46[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m0s[0m 6ms/step - loss: 23.1528 - root_mean_squared_error: 4.7391
[1m34/46[0m [32m━━━━━━━━━━━━━━[0m[37m━━━━━━[0m [1m0s[0m 5ms/step - loss: 17.8000 - root_mean_squared_error: 4.0975
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 14.9019 - root_mean_squared_error: 3.7121 - val_loss: 1.5372 - val_root_mean_squared_error: 1.2398

Epoch 2/3                                            

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2s[0m 49ms/step - loss: 1.4912 - root_mean_squared_err


