In [None]:
## FILL IN YOUR NAME
NAME = "example"

In [None]:
import os
import warnings
import sys

import pandas as pd
import numpy as np
import pickle
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from urllib.parse import urlparse
import mlflow
import mlflow.sklearn

import logging

%load_ext dotenv
%dotenv

logging.basicConfig(level=logging.WARN)
logger = logging.getLogger(__name__)

## Load Dataset

We will use an example from the official documentation of MLflow. The goal is to model wine quality based on physicochemical tests (see more [here](http://www3.dsi.uminho.pt/pcortez/wine/)). 

In [None]:
warnings.filterwarnings("ignore")
np.random.seed(40)

# Read the wine-quality csv file from the URL
csv_url = (
    "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
)
try:
    data = pd.read_csv(csv_url, sep=";")
except Exception as e:
    logger.exception(
        "Unable to download training & test CSV, check your internet connection. Error: %s", e
    )

# Split the data into training and test sets. (0.75, 0.25) split.
train_data, test_data = train_test_split(data)

# The predicted column is "quality" which is a scalar from [3, 9]
train_x = train_data.drop(["quality"], axis=1)
test_x = test_data.drop(["quality"], axis=1)
train_y = train_data[["quality"]]
test_y = test_data[["quality"]]

## Training

We will define a simple method to train the model. This method takes as inputs two of the hyperparameters of the model, namely `alpha` and `l1_ratio`. These parameters control the regularization of the model, so they will affect the complexity and the generalization power of the model (more [details](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ElasticNet.html)). 

*Disclaimer: we will use the test set to evaluate multiple times the performance of the model while changing its hyperparameters. This is not a good practice and we are doing it here just for the sake of simplicity.*

In [None]:
def train(alpha=0.5, l1_ratio=0.5):
    """Train an ElasticNet on the Wine Quality Dataset."""

    # train model
    lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
    lr.fit(train_x, train_y)

    # make predictions
    predicted_qualities = lr.predict(test_x)

    def eval_metrics(actual, pred):
        rmse = np.sqrt(mean_squared_error(actual, pred))
        mae = mean_absolute_error(actual, pred)
        r2 = r2_score(actual, pred)
        return rmse, mae, r2
    
    # evaluate trained model
    (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)
    print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
    print("  RMSE: %s" % rmse)
    print("  MAE: %s" % mae)
    print("  R2: %s" % r2)

Now we can train and evaluate the model by just calling the method `train`

In [None]:
train()

We can see that the `r2` is quite low, let's if we can improve it by playing a bit with the hyperparameters.

In [None]:
train(0.1, 0.1)

In [None]:
train(0.001, 0.001)

In [None]:
train(0.00001, 0.00001)

Okay, we have found a better performance by playing a bit with the hyperparameter :)

On the other hand, printing the performance on the stdout doesn't seem like the best solution to track the progress. Let's see what can we do by using MLflow tracking module.

# Tracking Experiments with MLflow

Next, let's check how much effort it will take to use the MLflow Tracking module in order to keep track of our experiments. The method `train_mlflow` below trains and evaluates an ElasticNet model in exactly the same way we did with the method `train` but it also sends the run details to MLflow so we can later visualize them.

Here are a few tips in case you want to try writing the method `train_mlflow` on your own:
* Check the **template** below (we already imported the libraries and set the experiment name for you)
* You can copy and pase the code in the method `train` above and then add the MLflow logging.
* You should log parameters (like `alpha` and `l1_ratio`), metrics (`rmse`, `mae`, etc.), the model and optionally some tags and artifacts of your choice.
* Check the [official documentation](https://www.mlflow.org/docs/latest/index.html) for more information.

### Template

<img src="experiment_tracking_template.png" alt="Drawing" style="width: 600px;"/>

In [None]:
# Configure experiment
experiment_name = f"sklearn-{NAME}"
mlflow.set_experiment(experiment_name)

def train_mlflow(alpha=0.5, l1_ratio=0.5):
    """Train an ElasticNet on the Wine Quality Dataset and Log Experiment to MLflow."""

    # Train model
    lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
    lr.fit(train_x, train_y)

    # Make predictions
    predicted_qualities = lr.predict(test_x)

    def eval_metrics(actual, pred):
        rmse = np.sqrt(mean_squared_error(actual, pred))
        mae = mean_absolute_error(actual, pred)
        r2 = r2_score(actual, pred)
        return rmse, mae, r2
    
    # Evaluate trained model
    (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)
    print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
    print("  RMSE: %s" % rmse)
    print("  MAE: %s" % mae)
    print("  R2: %s" % r2)
    
    
    # MLflow logging
    with mlflow.start_run():
        
        # Add tags to the run
        mlflow.set_tag('developer', 'cristian')
        
        # Log params
        mlflow.log_params({
            'alpha': alpha,
            'l1-ratio': l1_ratio
        })
        
        # Log metrics
        mlflow.log_metrics({
            'rmse': rmse,
            'mae': mae,
            'r2': r2
        })
        
        # Log model
        mlflow.sklearn.log_model(lr, artifact_path='model')

In [None]:
train_mlflow(0.1, 0.1)

In [None]:
train_mlflow(0.5, 0.3)

In [None]:
train_mlflow(0.1, 0.01)

In [None]:
for alpha in np.logspace(-10, -1, 5):
    for l1_ratio in np.logspace(-10, -1, 5):
        train_mlflow(alpha, l1_ratio)