# MLFlow usage example


This jupyter notebook predicts quality of wine using `sklearn.linear_model.ElasticNet` that is simply regularized linear regression that combines $L1$ and $L2$ penalties.

In [None]:
import mlflow
import mlflow.sklearn

import pandas as pd
import os
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet

from dotenv import load_dotenv
load_dotenv()

In [None]:
data_dir = os.environ["DATA_DIR"]
wine_quality_path = os.path.join(data_dir, "winequality-red.csv")

In [None]:
data = pd.read_csv(wine_quality_path, sep=';')

In [None]:
data.head()

In [None]:
data.describe()

In [None]:
def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    return rmse, mae

In [None]:
# Split the data into training and test sets. (0.75, 0.25) split.
train_set, test_set = train_test_split(data)

In [None]:
# separe inputs from target variable
train_x = train_set.drop(["quality"], axis=1)
test_x = test_set.drop(["quality"], axis=1)
train_y = train_set[["quality"]]
test_y = test_set[["quality"]]

In [None]:
# define train and validate functions
def train(train_x, train_y, alpha, l1_ratio):
    # Execute ElasticNet
    elastic_net = ElasticNet(l1_ratio=l1_ratio, alpha=alpha, random_state=42, normalize=True, max_iter=10000)
    elastic_net.fit(train_x, train_y)
    
    return elastic_net

def validate(elastic_net, x, y):
    predicted_qualities = elastic_net.predict(test_x)
    rmse, mae = eval_metrics(test_y, predicted_qualities)
    return rmse, mae

In [None]:
def run_experiment(data, alpha, l1_ratio):
    data = train_x, train_y, test_x, test_y
    with mlflow.start_run():
        elastic_net = train(train_x, train_y, alpha, l1_ratio)
        train_rmse, train_mae = validate(elastic_net, train_x, train_y)
        test_rmse, test_mae = validate(elastic_net, test_x, test_y)
        # Print out metrics
        print("Train  RMSE: %s" % train_rmse)
        print("Test  RMSE: %s" % test_rmse)
        print("Train  MAE: %s" % train_mae)
        print("Test  MAE: %s" % test_mae)

        # Log parameter, metrics, and model to MLflow
        mlflow.log_param("l1_ratio", l1_ratio)
        mlflow.log_param("alpha", alpha)
        mlflow.log_metric("train_rmse", train_rmse)
        mlflow.log_metric("test_rmse", test_rmse)
        mlflow.log_metric("train_mae", train_mae)
        mlflow.log_metric("test_mae", test_mae)

        mlflow.sklearn.log_model(elastic_net, "model")

In [None]:
data = train_x, train_y, test_x, test_y
run_experiment(data, 0.5, 0.5)