## In this quickstart, we will:
- Run a hyperparameter sweep on a training script
- Compare the results of the runs in the MLflow Ul
- Choose the best run and register it as a model
- Deploy the model to a REST API
- Build a container image suitable for deployment to a cloud platform

![Sample Image](./agenda.png)

In [1]:
import pandas as pd
import numpy as np
import mlflow
from mlflow.models import infer_signature # i repeatedly forget .models in this line
from urllib.parse import urlparse
import keras
import tensorflow as tf
from hyperopt import STATUS_OK, Trials, fmin,hp,tpe
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split


### Preprocessing

In [2]:
data = pd.read_csv("https://raw.githubusercontent.com/mlflow/mlflow/master/tests/datasets/winequality-white.csv",sep=";")
X,y = data.drop(columns=["quality"]).values,data["quality"].values.ravel()
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25,random_state=42)
train_x,valid_x,train_y,valid_y = train_test_split(X_train,y_train,test_size=0.2,random_state=42)
signature = infer_signature(train_x,train_y)

#ANN Model

In [3]:
def train_model(train_x,train_y,valid_x,valid_y,X_test,y_test,params,epochs):
    #model architecture
    mean = np.mean(train_x,axis=0)
    var = np.var(train_x,axis=0)

    model = keras.Sequential([
        keras.Input([train_x.shape[1]]),
        keras.layers.Normalization(mean=mean,variance=var),
        keras.layers.Dense(64,activation="relu"),
        keras.layers.Dense(1)]
    )

    model.compile(
        optimizer=keras.optimizers.SGD(
                    learning_rate=params["lr"],
                    momentum= params["momentum"] ),
        loss= "mean_squared_error",
        metrics=[keras.metrics.RootMeanSquaredError()]
                )
    with mlflow.start_run(nested=True):
        model.fit(train_x,train_y,
                  validation_data=(valid_x,valid_y),
                  epochs=epochs,
                  batch_size=64)
        eval_result = model.evaluate(valid_x,valid_y,batch_size=64)
        eval_rmse = eval_result[1]
        mlflow.log_params(params)
        mlflow.log_metric("eval_rmse",eval_rmse)
        mlflow.tensorflow.log_model(
                                    model,
                                    "model",
                                    signature=signature)
        return {"loss": eval_rmse, "status": STATUS_OK, "model": model}

In [4]:
def objective(params):
# MLflow will track the parameters and results for each run
    result = train_model(train_x=train_x,train_y=train_y,valid_x=valid_x,valid_y=valid_y,X_test=X_test,y_test=y_test,params=params,epochs =3)
    return result

In [5]:
space = {
    "lr": hp.loguniform("lr",np.log(1e-5),np.log(1e-1)),
    "momentum": hp.uniform("momentum",0.0,1.0)
}

In [6]:
mlflow.set_experiment("/wine-quality")
with mlflow.start_run():
    trials = Trials()
    best = fmin(
        fn = objective,
        space = space,
        algo = tpe.suggest,
        max_evals=4,
        trials = trials
    )

    best_run = sorted(trials.results,key=lambda x:x["loss"])[0]

    mlflow.log_params(best)
    mlflow.log_metric("eval_rmse", best_run["loss"])
    mlflow.tensorflow.log_model(best_run["model"],"model", signature = signature)
    print(f"Best parameters: {best}")
    print(f"Best eval rmse: {best_run['loss']}")

Epoch 1/3                                            

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m16s[0m 356ms/step - loss: 33.2692 - root_mean_squared_error: 5.7679
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 22.8137 - root_mean_squared_error: 4.7475 - val_loss: 5.6831 - val_root_mean_squared_error: 2.3839

Epoch 2/3                                            

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 19ms/step - loss: 6.1731 - root_mean_squared_error: 2.4846
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 4.1413 - root_mean_squared_error: 2.0277 - val_loss: 2.2998 - val_root_mean_squared_error: 1.5165

Epoch 3/3                                            

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 20ms/step - loss: 1.9682 - root_mean_squared_error: 1.4029
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2.2017 - root_mean_squared_error: 1.4823 - val_l

In [None]:
#### ANN cheet sheet

##### Data Prep 
- load
- divide into train validation and test sets
- signature
##### Model Definition
- create objective function with parameter- params and returns {"loss": eval_rmse, "status": STATUS_OK, "model": model}
- create training function that returns {"loss": eval_rmse, "status": STATUS_OK, "model": model}
    - define model with layers like input(size) normalization(mean,var) dense(size,activation) etc
    - compile configuration including optimizer(and its lr,momentum), loss, metrics
    - start nested mlflow experiment
    - model. fit . specify training data nad target, validation_data, epochs , batch size
    - get validation results
    - log params, metrics, model and return 

##### Hyperparameter Tuning
- specify hyperparameter space
- start mlflow experiment
- find best values using fmin function  where we need to epecify fn,space,algo,max_evals,trials
- log params, metric,model of best trial

