In [1]:
import numpy as np

In [2]:
import tensorflow as tf 
import keras 
import pandas as pd 
from hyperopt import STATUS_OK,fmin,hp,tpe,Trials
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import mlflow
from mlflow.models import infer_signature

In [3]:
data = pd.read_csv(
    "https://raw.githubusercontent.com/mlflow/mlflow-example/refs/heads/master/wine-quality.csv",
    sep = ','
)

In [4]:
data.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


In [5]:
train,test = train_test_split(data)

In [6]:
len(train)

3673

In [7]:
len(test)

1225

In [8]:
train.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
2120,6.8,0.25,0.27,10.7,0.076,47.0,154.0,0.9967,3.05,0.38,9.0,5
730,6.5,0.23,0.33,13.8,0.042,25.0,139.0,0.99695,3.35,0.56,10.4,6
4586,5.4,0.29,0.38,1.2,0.029,31.0,132.0,0.98895,3.28,0.36,12.4,6
427,6.8,0.29,0.5,13.3,0.053,48.0,194.0,0.9974,3.09,0.45,9.4,5
90,7.2,0.39,0.63,11.0,0.044,55.0,156.0,0.9974,3.09,0.44,8.7,6


In [9]:
test.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
3532,7.2,0.23,0.46,6.4,0.036,17.0,85.0,0.99279,3.1,0.78,11.7,6
3043,7.2,0.21,1.0,1.1,0.154,46.0,114.0,0.9931,2.95,0.43,9.2,6
1622,6.5,0.44,0.49,7.7,0.045,16.0,169.0,0.9957,3.11,0.37,8.7,6
2184,6.4,0.33,0.24,9.8,0.041,29.0,109.0,0.9956,3.29,0.47,10.1,6
2780,6.2,0.235,0.34,1.9,0.036,4.0,117.0,0.99032,3.4,0.44,12.2,5


In [10]:
train.columns

Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol', 'quality'],
      dtype='object')

In [11]:
train.describe()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
count,3673.0,3673.0,3673.0,3673.0,3673.0,3673.0,3673.0,3673.0,3673.0,3673.0,3673.0,3673.0
mean,6.84936,0.278008,0.333496,6.413232,0.045953,35.681323,138.767901,0.994041,3.188364,0.49006,10.503668,5.873128
std,0.851329,0.10012,0.123817,5.104719,0.022783,17.514286,42.858007,0.003013,0.154779,0.113475,1.230381,0.886672
min,3.8,0.08,0.0,0.6,0.012,2.0,9.0,0.98711,2.77,0.22,8.0,3.0
25%,6.3,0.21,0.27,1.7,0.036,24.0,108.0,0.99176,3.08,0.41,9.5,5.0
50%,6.8,0.26,0.32,5.2,0.043,34.0,135.0,0.9938,3.18,0.47,10.4,6.0
75%,7.3,0.32,0.39,10.0,0.05,46.0,168.0,0.9961,3.28,0.55,11.4,6.0
max,14.2,1.1,1.66,65.8,0.346,289.0,440.0,1.03898,3.82,1.06,14.2,9.0


In [12]:
train.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3673 entries, 2120 to 3677
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   fixed acidity         3673 non-null   float64
 1   volatile acidity      3673 non-null   float64
 2   citric acid           3673 non-null   float64
 3   residual sugar        3673 non-null   float64
 4   chlorides             3673 non-null   float64
 5   free sulfur dioxide   3673 non-null   float64
 6   total sulfur dioxide  3673 non-null   float64
 7   density               3673 non-null   float64
 8   pH                    3673 non-null   float64
 9   sulphates             3673 non-null   float64
 10  alcohol               3673 non-null   float64
 11  quality               3673 non-null   int64  
dtypes: float64(11), int64(1)
memory usage: 373.0 KB


In [13]:
train.sample()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
3861,6.2,0.22,0.3,12.4,0.054,108.0,152.0,0.99728,3.1,0.47,9.5,6


In [14]:
# quality is dependent variable 
# other are independent variable

In [15]:
train_x = train.drop(['quality'],axis=1).values  # .values is to convert into arrays 

In [16]:
train[['quality']].values ## 2d array so use ravel 

array([[5],
       [6],
       [6],
       ...,
       [5],
       [6],
       [6]])

In [17]:
train_y = train[['quality']].values.ravel() # ravel single dimension array , 

In [18]:
test_x = test.drop(['quality'],axis=1).values

In [19]:
test_y = train[['quality']].values.ravel() # ravel single dimension array , 

In [20]:
### test_x, test_y is test data 

In [21]:
train_x,valid_x,train_y,valid_y=train_test_split(train_x,test_y,test_size=0.20,random_state=42)

In [22]:
signature = infer_signature(train_x,train_y)

In [23]:
## ann model 

In [54]:
def train_model(params,epochs,train_x,train_y,valid_x,valid_y,test_x,test_y):
    # model architecture 
    mean = np.mean(train_x,axis=0)
    var = np.var(train_x,axis=0)
    model = keras.Sequential(
        [
            keras.Input([train_x.shape[1]]),
            keras.layers.Normalization(mean=mean,variance=var),
            keras. layers.Dense(64,activation='relu'),
            keras.layers.Dense(1)
        ]
    )
    model.compile(optimizer=keras.optimizers.SGD(learning_rate=params['lr'],momentum=params['momentum']),loss='mean_squared_error',metrics = [keras.metrics.RootMeanSquaredError()])

    # train 
    with mlflow.start_run(nested=True):
        model.fit(
            train_x,train_y,validation_data=(valid_x,valid_y),
            epochs=epochs,batch_size=64,
        )
        eval_result = model.evaluate(valid_x,valid_y,batch_size=64)
        eval_rmse = eval_result[1]
        # log the parameters and results 
        mlflow.log_params(params)
        mlflow.log_metric("eval_rmse",eval_rmse)
        mlflow.tensorflow.log_model(model,"model",signature=signature)
        return {"loss":eval_rmse,"status":STATUS_OK,'model':model}
    



In [55]:
def objective(params):
    result = train_model(
        params,
        epochs=3,
        train_x=train_x,
        train_y=train_y,
        valid_x=valid_x,
        valid_y=valid_y,
        test_x=test_x,
        test_y=test_y,
    )
    return result

In [56]:
space = {
    'lr':hp.loguniform("lr",np.log(1e-5),np.log(1e-1)),
    "momentum":hp.uniform('momentum',0.0,1.0)
}

In [57]:
mlflow.set_experiment("/wine-quality")

<Experiment: artifact_location='file:///d:/mlflow/deep_learning%20project/mlruns/164065983286707495', creation_time=1733849611023, experiment_id='164065983286707495', last_update_time=1733849611023, lifecycle_stage='active', name='/wine-quality', tags={}>

In [58]:
with mlflow.start_run():
    trials = Trials()
    best = fmin(
        fn = objective,
        space = space ,
        algo = tpe.suggest,
        max_evals= 4,
        trials = trials
    )
    best_run = sorted(trials.results,key = lambda x:x['loss'])[0]
    mlflow.log_metric('eval_rmse',best_run['loss'])
    mlflow.tensorflow.log_model(best_run['model'],'model',signature=signature)
    print(f"best parameters {best} ")
    print(f"best eval rmse {best_run['loss']}")

Epoch 1/3                                            

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3:38[0m 5s/step - loss: 35.9813 - root_mean_squared_error: 5.9984
[1m 2/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2s[0m 63ms/step - loss: 34.9909 - root_mean_squared_error: 5.9147
[1m33/46[0m [32m━━━━━━━━━━━━━━[0m[37m━━━━━━[0m [1m0s[0m 4ms/step - loss: 32.7371 - root_mean_squared_error: 5.7214 
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 9ms/step - loss: 32.6071 - root_mean_squared_error: 5.7100 - val_loss: 31.3800 - val_root_mean_squared_error: 5.6018

Epoch 2/3                                            

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 30ms/step - loss: 33.3994 - root_mean_squared_error: 5.7792
[1m33/46[0m [32m━━━━━━━━━━━━━━[0m[37m━━━━━━[0m [1m0s[0m 2ms/step - loss: 31.8631 - root_mean_squared_error: 5.6446 
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 31.6994 - root_mean_squared_error:

In [59]:
from mlflow.models import validate_serving_input

model_uri = 'runs:/a5512b71ac9c4ccfae061269d9fdff8c/model'

# The logged model does not contain an input_example.
# Manually generate a serving payload to verify your model prior to deployment.
from mlflow.models import convert_input_example_to_serving_input

# Define INPUT_EXAMPLE via assignment with your own input example to the model
# A valid input example is a data instance suitable for pyfunc prediction
serving_payload = convert_input_example_to_serving_input(test_x)

# Validate the serving payload works on the model
validate_serving_input(model_uri, serving_payload)

  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 1166.33it/s]


[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


array([[6.2001705],
       [6.43152  ],
       [5.247196 ],
       ...,
       [5.7217097],
       [5.2694497],
       [6.7257595]], dtype=float32)

In [60]:
mlflow.register_model(model_uri,'wine_model')

Successfully registered model 'wine_model'.
Created version '1' of model 'wine_model'.


<ModelVersion: aliases=[], creation_timestamp=1733851108236, current_stage='None', description=None, last_updated_timestamp=1733851108236, name='wine_model', run_id='a5512b71ac9c4ccfae061269d9fdff8c', run_link=None, source='file:///d:/mlflow/deep_learning%20project/mlruns/164065983286707495/a5512b71ac9c4ccfae061269d9fdff8c/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=1>