In [1]:
import keras
import mlflow
import pandas as pd
import numpy as np
import tensorflow as tf
from mlflow.models import infer_signature
from sklearn.model_selection import train_test_split
from sklearn.metrics import root_mean_squared_error
from hyperopt import fmin,hp,Trials,tpe,STATUS_OK


In [2]:
data = pd.read_csv("https://raw.githubusercontent.com/mlflow/mlflow/master/tests/datasets/winequality-white.csv", sep=";")
# X,y = data.drop(columns=["quality"]),data["quality"]
X,y = data.drop(columns=["quality"]).values,data["quality"].values.ravel()
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25)
X_train,X_valid,y_train,y_valid = train_test_split(X_train,y_train,test_size=0.2)
signature = infer_signature(X_train,y_train)

In [3]:
def train(X_train,X_valid,y_train,y_valid,params,epochs):
    mean = np.mean(X_train,axis=0)
    var = np.var(X_train,axis=0)
    model = keras.Sequential([
                            keras.Input([X_train.shape[1]]),
                            keras.layers.Normalization(mean=mean,variance=var),
                            keras.layers.Dense(64,activation="relu"),
                            keras.layers.Dense(1)])
    model.compile(
                  optimizer= keras.optimizers.SGD(
                                                  learning_rate=params["lr"],
                                                  momentum=params["momentum"]),
                  loss="mean_squared_error",
                  metrics=[keras.metrics.RootMeanSquaredError()])
    with mlflow.start_run(nested=True):
        model.fit(
                  X_train,
                  y_train,
                  validation_data=(X_valid,y_valid),
                  epochs=epochs,
                  batch_size=64)
        eval_rmse = model.evaluate(
                                X_valid,
                                y_valid,
                                batch_size=64)[1]
        mlflow.log_params(params)
        mlflow.log_metric("Validation RMSE", eval_rmse)
        mlflow.tensorflow.log_model(
                                    model,
                                    "model",
                                    signature=signature)
        return {
            "loss":eval_rmse,
            "status":STATUS_OK,
            "model":model}
        

In [4]:
def objective(params):
    return train(X_train,X_valid,y_train,y_valid,params,epochs=3)    

In [5]:
params = {
    "lr":hp.loguniform("lr",np.log(1e-5),np.log(1e-1)),
    "momentum":hp.uniform("momentum",0.0,1.0)
}


In [6]:
mlflow.set_experiment("/wine-quality")
with mlflow.start_run():
    trials=Trials()
    best_parameters = fmin(
                fn=objective,
                space=params,
                trials=trials,
                algo=tpe.suggest,
                max_evals=4)
    best_run = sorted(trials.results, key=lambda x: x["loss"])[0]
    mlflow.log_params(best_parameters)
    mlflow.log_metric("Validation RMSE",best_run["loss"])
    mlflow.tensorflow.log_model(
                                best_run["model"],
                                "model",
                                signature=signature)
    print("Best params",best_parameters)
    print("Best RMSE",best_run["loss"])

Epoch 1/3                                            

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m16s[0m 360ms/step - loss: 33.5079 - root_mean_squared_error: 5.7886
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 32.9033 - root_mean_squared_error: 5.7361 - val_loss: 31.8233 - val_root_mean_squared_error: 5.6412

Epoch 2/3                                            

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 19ms/step - loss: 32.4550 - root_mean_squared_error: 5.6969
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 32.1165 - root_mean_squared_error: 5.6671 - val_loss: 31.0723 - val_root_mean_squared_error: 5.5743

Epoch 3/3                                            

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 19ms/step - loss: 32.4168 - root_mean_squared_error: 5.6936
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 31.1631 - root_mean_squared_error: 5.5824 -

In [1]:
import keras
import mlflow
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from hyperopt import fmin, hp, Trials, tpe, STATUS_OK
from mlflow.models import infer_signature
from keras.callbacks import EarlyStopping

# Step 1: Data Preprocessing
def preprocess_data(url):
    data = pd.read_csv(url, sep=";")
    X, y = data.drop(columns=["quality"]).values, data["quality"].values.ravel()
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
    X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
    return X_train, X_valid, X_test, y_train, y_valid, y_test

# Step 2: Dynamically Design Model
def create_model(input_shape, params):
    model = keras.Sequential()
    model.add(keras.Input(shape=input_shape))
    for i in range(params['num_layers']):
        model.add(keras.layers.Dense(params['units_per_layer'], activation=params['activation']))
    model.add(keras.layers.Dense(1))  # Output layer for regression
    model.compile(optimizer=params['optimizer'], loss='mse', metrics=['mae'])
    return model

# Step 3: Train Model with Early Stopping and Logging
def train_model(X_train, X_valid, y_train, y_valid, params, epochs=100):
    # Normalize Data
    mean = np.mean(X_train, axis=0)
    std = np.std(X_train, axis=0)
    X_train = (X_train - mean) / std
    X_valid = (X_valid - mean) / std

    # Create Model
    model = create_model(X_train.shape[1:], params)

    # Setup Early Stopping
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    # Start MLflow Logging
    mlflow.start_run()
    mlflow.log_params(params)

    # Train Model
    history = model.fit(
        X_train, y_train,
        validation_data=(X_valid, y_valid),
        epochs=epochs,
        batch_size=params.get('batch_size', 32),
        callbacks=[early_stopping],
        verbose=0
    )

    # Log Metrics
    for epoch, (train_loss, val_loss) in enumerate(zip(history.history['loss'], history.history['val_loss'])):
        mlflow.log_metric('train_loss', train_loss, step=epoch)
        mlflow.log_metric('val_loss', val_loss, step=epoch)

    # Log Model
    mlflow.keras.log_model(model, "model")
    mlflow.end_run()

    return model

# Step 4: Evaluate Model
def evaluate_model(model, X_test, y_test):
    mean = np.mean(X_test, axis=0)
    std = np.std(X_test, axis=0)
    X_test = (X_test - mean) / std
    predictions = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, predictions))
    return rmse

# Step 5: Define Hyperparameter Space for Optimization
hyperparameter_space = {
    'num_layers': hp.choice('num_layers', [1, 2, 3]),
    'units_per_layer': hp.choice('units_per_layer', [32, 64, 128]),
    'activation': hp.choice('activation', ['relu', 'tanh']),
    'optimizer': hp.choice('optimizer', ['adam', 'sgd']),
    'batch_size': hp.choice('batch_size', [16, 32, 64])
}

# Step 6: Objective Function for Hyperparameter Optimization
def objective(params):
    X_train, X_valid, X_test, y_train, y_valid, y_test = preprocess_data(
        "https://raw.githubusercontent.com/mlflow/mlflow/master/tests/datasets/winequality-white.csv"
    )
    model = train_model(X_train, X_valid, y_train, y_valid, params, epochs=100)
    rmse = evaluate_model(model, X_valid, y_valid)
    return {'loss': rmse, 'status': STATUS_OK}

# Run Hyperparameter Optimization
trials = Trials()
best_params = fmin(
    fn=objective,
    space=hyperparameter_space,
    algo=tpe.suggest,
    max_evals=2,
    trials=trials
)

print("Best Parameters:", best_params)


  0%|          | 0/2 [00:00<?, ?trial/s, best loss=?]





[1m 1/23[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 36ms/step
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 

 50%|█████     | 1/2 [00:12<00:12, 12.95s/trial, best loss: 0.6990685852723562]





[1m 1/23[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 39ms/step               
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step       

100%|██████████| 2/2 [00:24<00:00, 12.45s/trial, best loss: 0.6990685852723562]
Best Parameters: {'activation': np.int64(1), 'batch_size': np.int64(1), 'num_layers': np.int64(1), 'optimizer': np.int64(1), 'units_per_layer': np.int64(2)}
