In [2]:
import keras
import numpy as np
import pandas as pd
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe
from sklearn.model_selection import train_test_split
import mlflow
from mlflow.models import infer_signature

In [3]:
data = pd.read_csv(r'C:\Users\nico_\Desktop\MLOPS\data\winequality-white.csv', sep=';')

In [4]:
data.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


In [5]:
data.shape

(4898, 12)

In [6]:
data.isna().sum()

fixed acidity           0
volatile acidity        0
citric acid             0
residual sugar          0
chlorides               0
free sulfur dioxide     0
total sulfur dioxide    0
density                 0
pH                      0
sulphates               0
alcohol                 0
quality                 0
dtype: int64

In [7]:
# Split the data
train, test = train_test_split(data, test_size=0.25, random_state=42)

In [8]:
train.values

array([[ 6.3 ,  0.25,  0.22, ...,  0.5 , 10.5 ,  6.  ],
       [ 7.8 ,  0.3 ,  0.29, ...,  0.38,  9.  ,  6.  ],
       [ 7.4 ,  0.38,  0.27, ...,  0.43, 10.  ,  5.  ],
       ...,
       [ 7.6 ,  0.27,  0.52, ...,  0.53, 11.4 ,  6.  ],
       [ 6.3 ,  0.24,  0.29, ...,  0.38, 10.6 ,  6.  ],
       [ 8.1 ,  0.27,  0.35, ...,  0.63, 10.4 ,  8.  ]])

In [9]:
X_train = train.drop(['quality'], axis=1).values
y_train = train['quality'].values

# Test dataset
X_test = test.drop(['quality'], axis=1).values
y_test = test['quality'].values

# Splitting the train data into train and validation
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

signature = infer_signature(X_train, y_train)

In [10]:
np.mean(X_train, axis=0)

array([6.86621852e+00, 2.80377808e-01, 3.32597005e-01, 6.42164738e+00,
       4.55513955e-02, 3.53556841e+01, 1.38792376e+02, 9.94074221e-01,
       3.18919333e+00, 4.88396869e-01, 1.05005673e+01])

In [None]:
# ANN Model

def train_model(params, epochs, X_train, y_train, X_valid, y_valid, X_test, y_test):

    # Define model architecture
    mean=np.mean(X_train, axis=0)
    var = np.var(X_train, axis=0)

    model = keras.Sequential([
        keras.Input([X_train.shape[1]]),
        keras.layers.Normalization(mean=mean, variance=var),
        keras.layers.Dense(64, activation='relu'),
        keras.layers.Dense(1)
        ]
    )

    # Compile the model
    model.compile(optimizer=keras.optimizers.SGD(learning_rate=params['lr'], momentum=params['momentum']),
    loss='mean_squared_error', 
    metrics=[keras.metrics.RootMeanSquaredError()]
    )

    # Train the model
    with mlflow.start_run(nested=True):
        model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=epochs, batch_size=64)
    
        # Evaluate the model
        eval_result = model.evaluate(X_valid, y_valid, batch_size=64)

        eval_rmse = eval_result[1]

        # Log the parameters and results
        mlflow.log_params(params)
        mlflow.log_metric('eval_rmse', eval_rmse)

        # Log the model
        mlflow.tensorflow.log_model(model, 'model', signature=signature)

        return {'loss': eval_rmse, 'status' : STATUS_OK, 'model' : model}


# On crée une "sous-expérience" (nested run) à l'intérieur d'une expérience principale
# Ensuite, la grande expérience principale (mlflow.start_run()) regroupe toutes les sous-expériences.
# Cela permet d’organiser proprement les logs et d’analyser facilement chaque test.
# Sans nested=True, chaque exécution écraserait la précédente.



In [None]:
def objective(params):
    # MLFlow will track the parameters and results for each run
    result = train_model(
        params,
        epochs=3,
        X_train=X_train,
        y_train =y_train,
        X_valid=X_valid,
        y_valid=y_valid,
        X_test=X_test,
        y_test=y_test
    )
    return result

# Cette fonction est utilisée par Hyperopt pour tester différents hyperparamètres et sélectionner les meilleurs.

In [None]:
space={
    'lr':hp.loguniform('lr',np.log(1e-5),np.log(1e-1)),
    'momentum':hp.uniform('momentum',0.0,1.0)
}

# On utilise log pour avoir des valeurs très petite dans l'intervale

params contient quelque chose comme :
params = {
    'lr': 0.00123,  # Une valeur choisie par Hyperopt
    'momentum': 0.75
}

In [None]:
mlflow.set_experiment('/wine-quality')
with mlflow.start_run():
    # Conduct the hyperparamter search using Hyperopt
    trials=Trials()
    best=fmin(
        fn=objective,
        space=space,
        algo=tpe.suggest,          #  (Tree-structured Parzen Estimator) pour essayer d'améliorer chaque essai (lr et momentum) en fonction des précédents.
        max_evals=4,               # Hyperopt va tester 4 combinaisons différentes d'hyperparamètres (lr et momentum)
        trials=trials
    )

    # Fetch the details of the best run
    best_run = sorted(trials.results, key=lambda x: x['loss'])[0]

    # Log the best parameters, loss, and model
    mlflow.log_params(best)
    mlflow.log_metric('eval_rmse', best_run['loss'])
    mlflow.tensorflow.log_model(best_run['model'], 'model', signature=signature)

    # Print out the best parameters and corresponding loss
    print(f'Best parameters: {best}')
    print(f"Best eval rmse: {best_run['loss']}")
    

Epoch 1/3                                            

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m19s[0m 433ms/step - loss: 35.9184 - root_mean_squared_error: 5.9932
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 32.5909 - root_mean_squared_error: 5.7077 - val_loss: 27.7882 - val_root_mean_squared_error: 5.2715

Epoch 2/3                                            

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 28.3634 - root_mean_squared_error: 5.3257
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 26.4240 - root_mean_squared_error: 5.1399 - val_loss: 22.6039 - val_root_mean_squared_error: 4.7544

Epoch 3/3                                            

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 21.3557 - root_mean_squared_error: 4.6212
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 21.2869 - root_mean_squared_error: 4.6134 -

In [12]:
## Inferencing

model_uri = 'runs:/f65dbf360b1a4a73ae7730f164ba2096/model'

# Replace INPUT_EXAMPLE with your own input example to the model
# A valid input example is a data instance suitable for pyfunc prediction
input_data = X_test

# Verify the model with the provided input data using the logged dependencies.
# For more details, refer to:
# https://mlflow.org/docs/latest/models.html#validate-models-before-deployment
mlflow.models.predict(
    model_uri=model_uri,
    input_data=input_data,
    env_manager="local",
)

2025/03/18 12:24:54 INFO mlflow.models.python_api: It is highly recommended to use `uv` as the environment manager for predicting with MLflow models as its performance is significantly better than other environment managers. Run `pip install uv` to install uv. See https://docs.astral.sh/uv/getting-started/installation for other installation methods.
Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 1030.51it/s]
2025/03/18 12:24:54 INFO mlflow.models.flavor_backend_registry: Selected backend for flavor 'python_function'


[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
{"predictions": [[4.479798316955566], [6.618078231811523], [6.442264556884766], [4.634179592132568], [4.935753345489502], [7.184676647186279], [5.5371270179748535], [5.8611650466918945], [6.590724945068359], [6.238991737365723], [7.162424564361572], [4.432512283325195], [6.945852756500244], [4.436302661895752], [6.385372161865234], [4.773663520812988], [7.149472236633301], [6.696700572967529], [6.256653785705566], [4.8512043952941895], [4.8480119705200195], [6.290306568145752], [3.9851937294006348], [4.543614864349365], [4.412691593170166], [4.271538257598877], [4.040979385375977], [5.473069667816162], [5.743854999542236], [4.180917263031006], [5.323483943939209], [4.8478827476501465], [6.494192600250244], [4.590468406677246], [5.444142818450928], [6.346088886260986], [7.522966384887695], [4.624701499938965], [4.6655378341674805], [6.389747142791748], [5.342738628387451], [5.682969570159912], [5.3607401847839355]

In [11]:
# Load model as a PyFuncModel.
model_uri = 'runs:/f65dbf360b1a4a73ae7730f164ba2096/model'
loaded_model = mlflow.pyfunc.load_model(model_uri)

# Predict on a Pandas DataFrame.
import pandas as pd
loaded_model.predict(pd.DataFrame(X_test))

[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


array([[4.4797983],
       [6.618078 ],
       [6.4422646],
       ...,
       [6.926213 ],
       [6.375011 ],
       [4.9501357]], dtype=float32)

In [None]:
## register in the model registry    (Enregistre le model dans le registre MLFLOW)
mlflow.register_model(model_uri, "wine-quality")

Successfully registered model 'wine-quality'.
Created version '1' of model 'wine-quality'.


<ModelVersion: aliases=[], creation_timestamp=1742297853255, current_stage='None', description=None, last_updated_timestamp=1742297853255, name='wine-quality', run_id='f65dbf360b1a4a73ae7730f164ba2096', run_link=None, source='file:///c:/Users/nico_/Desktop/MLOPS/MLFlowStarter/DLMLFLOW/mlruns/110240270250196691/f65dbf360b1a4a73ae7730f164ba2096/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=1>