In [83]:
import pandas as pd
import numpy as np
from hyperopt import STATUS_OK , Trials , hp ,fmin , Trials , tpe
from sklearn.model_selection import train_test_split
import mlflow
from mlflow.models import infer_signature
import keras

In [None]:
data=pd.read_csv(
    "https://raw.githubusercontent.com/mlflow/mlflow/master/tests/datasets/winequality-white.csv",
    sep=";",
)
data

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.00100,3.00,0.45,8.8,6
1,6.3,0.30,0.34,1.6,0.049,14.0,132.0,0.99400,3.30,0.49,9.5,6
2,8.1,0.28,0.40,6.9,0.050,30.0,97.0,0.99510,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.99560,3.19,0.40,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.99560,3.19,0.40,9.9,6
...,...,...,...,...,...,...,...,...,...,...,...,...
4893,6.2,0.21,0.29,1.6,0.039,24.0,92.0,0.99114,3.27,0.50,11.2,6
4894,6.6,0.32,0.36,8.0,0.047,57.0,168.0,0.99490,3.15,0.46,9.6,5
4895,6.5,0.24,0.19,1.2,0.041,30.0,111.0,0.99254,2.99,0.46,9.4,6
4896,5.5,0.29,0.30,1.1,0.022,20.0,110.0,0.98869,3.34,0.38,12.8,7


In [49]:
X = data.drop(columns=['quality'] , axis = 1)
y = data['quality']

In [109]:
X_train , X_test , y_train , y_test = train_test_split(X , y , test_size = 0.2 , random_state=101)


In [110]:
X_train , X_valid , y_train , y_valid = train_test_split(X_train , y_train , test_size = 0.2 , random_state = 101)

In [111]:
y_train

1866    6
1366    6
253     3
4217    4
1066    6
       ..
3568    7
3829    7
4831    6
3191    6
4849    5
Name: quality, Length: 3134, dtype: int64

In [112]:
y_train.shape

(3134,)

In [113]:
X_train = np.array(X_train)
y_train = np.array(y_train)
X_valid = np.array(X_valid)
y_valid = np.array(y_valid)
X_test = np.array(X_test)
y_test =np.array(y_test)

In [114]:
y_train = y_train.reshape(-1 , 1).flatten()
y_valid = y_valid.reshape(-1 , 1).flatten()
y_test = y_test.reshape(-1 , 1).flatten()

In [115]:
print(f"X_train and y_train shape are {X_train.shape} and {y_train.shape}")
print(f"X_valid and y_valid shape are {X_valid.shape} and {y_valid.shape}")
print(f"X_test and y_test shape are {X_test.shape} and {y_test.shape}")

X_train and y_train shape are (3134, 11) and (3134,)
X_valid and y_valid shape are (784, 11) and (784,)
X_test and y_test shape are (980, 11) and (980,)


In [57]:
y_test

array([6, 6, 3, ..., 6, 6, 5])

In [116]:
signature = infer_signature(X_train , y_train)

In [117]:
from keras.layers import Input , Normalization , Dense
from keras.optimizers import Adam
from keras.metrics import RootMeanSquaredError

In [128]:
def train_model(X_train , y_train , X_valid , y_valid , X_test , y_test , params , epochs):

    mean = np.mean(X_train , axis = 0)
    var = np.var(X_train , axis = 0)

    model = keras.Sequential()
    model.add(Input([X_train.shape[1]]))
    model.add(Normalization(mean=mean , variance=var))
    model.add(Dense(64 , activation ='relu'))
    model.add(Dense(1))

    metric = RootMeanSquaredError()
    model.compile(optimizer=Adam(learning_rate=params["lr"]) , loss="mean_squared_error" , metrics=[metric])

    with mlflow.start_run(nested=True):
        model.fit(X_train , y_train , validation_data=(X_valid , y_valid) , epochs = epochs , batch_size=32)

        eval_model = model.evaluate(X_test , y_test)
        eval_rmse = eval_model[1]

        mlflow.log_params(params)
        mlflow.log_metric("eval_rmse" , eval_rmse)

        mlflow.tensorflow.log_model(model ,"model" , signature=signature)

        return {"loss" : eval_rmse , "status" : STATUS_OK , "model" : model}

In [129]:
def objective(params):
    # MLflow will track the parameters and results for each run
    result = train_model(
        X_train,
        y_train,
        X_valid,
        y_valid,
        X_test,
        y_test,
        params,
        3
    )
    return result

In [130]:
space = {
    "lr" : hp.loguniform("lr", np.log(1e-5) , np.log(1e-2))
}

In [131]:
mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment("Wine Quality")
with mlflow.start_run():

    trial = Trials()
    best = fmin(
        fn = objective,
        space = space ,
        trials = trial,
        algo = tpe.suggest,
        max_evals = 3
    )

    best_run = sorted(trial.results , key=lambda x: x["loss"])[0]

    mlflow.log_params(best)
    mlflow.log_metric("eval_rmse" , best_run["loss"])
    mlflow.tensorflow.log_model(best_run["model"] , "model" , signature = signature)

    print(f"Best parameters: {best}")
    print(f"Best eval rmse: {best_run['loss']}")


2025/05/20 22:38:07 INFO mlflow.tracking.fluent: Experiment with name 'Wine Quality' does not exist. Creating a new experiment.


Epoch 1/3                                            

[1m 1/98[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:03[0m 658ms/step - loss: 37.8328 - root_mean_squared_error: 6.1508
[1m22/98[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m0s[0m 2ms/step - loss: 38.6682 - root_mean_squared_error: 6.2183    
[1m45/98[0m [32m━━━━━━━━━[0m[37m━━━━━━━━━━━[0m [1m0s[0m 2ms/step - loss: 38.1210 - root_mean_squared_error: 6.1740
[1m71/98[0m [32m━━━━━━━━━━━━━━[0m[37m━━━━━━[0m [1m0s[0m 2ms/step - loss: 37.4164 - root_mean_squared_error: 6.1162
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 36.7086 - root_mean_squared_error: 6.0575
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 36.6837 - root_mean_squared_error: 6.0554 - val_loss: 29.3793 - val_root_mean_squared_error: 5.4203

Epoch 2/3                                            

[1m 1/98[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m4s[0m 51ms/step - loss: 28.5233 - root_mean_s

In [None]:
import mlflow

model_uri = 'runs:/53bd5d1d15d74d49b87cfebc084baa30/model'

# Replace INPUT_EXAMPLE with your own input example to the model
# A valid input example is a data instance suitable for pyfunc prediction
input_data = X_test

# Verify the model with the provided input data using the logged dependencies.
# For more details, refer to:
# https://mlflow.org/docs/latest/models.html#validate-models-before-deployment
mlflow.models.predict(
    model_uri=model_uri,
    input_data=input_data,
    env_manager="local",
)


2025/05/20 23:05:15 INFO mlflow.models.python_api: It is highly recommended to use `uv` as the environment manager for predicting with MLflow models as its performance is significantly better than other environment managers. Run `pip install uv` to install uv. See https://docs.astral.sh/uv/getting-started/installation for other installation methods.
Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 60.79it/s]  
2025/05/20 23:05:15 INFO mlflow.models.flavor_backend_registry: Selected backend for flavor 'python_function'
Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 54.94it/s] 


[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
{"predictions": [[5.469059467315674], [6.422222137451172], [6.413414001464844], [5.503286361694336], [5.267496109008789], [6.1405229568481445], [5.412426948547363], [5.712819576263428], [5.832468032836914], [5.370978355407715], [5.638043403625488], [5.744564056396484], [6.315342426300049], [6.073831558227539], [5.363818168640137], [8.270758628845215], [5.764993667602539], [6.188596248626709], [5.603005409240723], [6.166471481323242], [5.441128730773926], [5.6060709953308105], [4.633454322814941], [5.079880714416504], [7.158854007720947], [6.52211332321167], [6.371703147888184], [5.99822998046875], [5.943861961364746], [5.508855819702148], [5.80950927734375], [5.958525657653809], [5.673552513122559], [5.527407646179199], [5.448401927947998], [5.347448348999023], [5.5475945472717285], [6.595968246459961], [6.070286750793457], [5.946592807769775], [4.935441493988037], [6.7395100593566895], [6.7451019287109375], [5.47