In [1]:
import os
import warnings
import sys

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

import mlflow
import mlflow.sklearn

In [2]:
data_path = "data/wine-quality.csv"
data = pd.read_csv(data_path)

data.sample(10)

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
3532,7.2,0.23,0.46,6.4,0.036,17.0,85.0,0.99279,3.1,0.78,11.7,6
1722,7.5,0.4,1.0,19.5,0.041,33.0,148.0,0.9977,3.24,0.38,12.0,6
409,8.2,0.28,0.42,1.8,0.031,30.0,93.0,0.9917,3.09,0.39,11.4,5
2110,6.5,0.22,0.25,17.1,0.05,44.0,138.0,1.0001,3.3,0.37,8.8,5
2374,7.2,0.2,0.3,2.0,0.039,43.0,188.0,0.9911,3.3,0.41,12.0,6
67,6.7,0.25,0.13,1.2,0.041,81.0,174.0,0.992,3.14,0.42,9.8,5
848,8.7,0.23,0.32,13.4,0.044,35.0,169.0,0.99975,3.12,0.47,8.8,7
4105,6.9,0.3,0.45,1.4,0.039,36.0,122.0,0.99059,3.07,0.47,11.1,7
3119,5.9,0.19,0.37,0.8,0.027,3.0,21.0,0.9897,3.09,0.31,10.8,5
3912,6.5,0.33,0.3,3.8,0.036,34.0,88.0,0.99028,3.25,0.63,12.5,7


In [3]:
remote_server_uri = "http://0.0.0.0:5000" # set to your server URI
mlflow.set_tracking_uri(remote_server_uri)

In [4]:
mlflow.tracking.get_tracking_uri()

'http://0.0.0.0:5000'

In [5]:
exp_name = "ElasticNet_wine"
mlflow.set_experiment(exp_name)

<Experiment: artifact_location='/vagrant/data/mlflow-walkthrough/mlruns/313964387828406350', creation_time=1712598351500, experiment_id='313964387828406350', last_update_time=1712598351500, lifecycle_stage='active', name='ElasticNet_wine', tags={}>

In [6]:
def eval_metrics(actual, pred):
    # compute relevant metrics
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2


def load_data(data_path):
    data = pd.read_csv(data_path)

    # Split the data into training and test sets. (0.75, 0.25) split.
    train, test = train_test_split(data)

    # The predicted column is "quality" which is a scalar from [3, 9]
    train_x = train.drop(["quality"], axis=1)
    test_x = test.drop(["quality"], axis=1)
    train_y = train[["quality"]]
    test_y = test[["quality"]]
    return train_x, train_y, test_x, test_y

def train(alpha=0.5, l1_ratio=0.5):
    # train a model with given parameters
    warnings.filterwarnings("ignore")
    np.random.seed(40)

    # Read the wine-quality csv file (make sure you're running this from the root of MLflow!)
    data_path = "data/wine-quality.csv"
    train_x, train_y, test_x, test_y = load_data(data_path)

    # Useful for multiple runs (only doing one run in this sample notebook)    
    with mlflow.start_run():
        # Execute ElasticNet
        lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
        lr.fit(train_x, train_y)

        # Evaluate Metrics
        predicted_qualities = lr.predict(test_x)
        (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

        # Print out metrics
        print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
        print("  RMSE: %s" % rmse)
        print("  MAE: %s" % mae)
        print("  R2: %s" % r2)
        
        # Log parameter, metrics, and model to MLflow
        mlflow.log_param(key="alpha", value=alpha)
        mlflow.log_param(key="l1_ratio", value=l1_ratio)
        mlflow.log_metric(key="rmse", value=rmse)
        mlflow.log_metrics({"mae": mae, "r2": r2})
        mlflow.log_artifact(data_path)
        print("Save to: {}".format(mlflow.get_artifact_uri()))
        
        mlflow.sklearn.log_model(lr, "model")

In [7]:
train(0.5, 0.5)

Elasticnet model (alpha=0.500000, l1_ratio=0.500000):
  RMSE: 0.8222428497595401
  MAE: 0.6278761410160693
  R2: 0.12678721972772666
Save to: /vagrant/data/mlflow-walkthrough/mlruns/313964387828406350/ce72ccd3a57f40d29c948b934c79ce01/artifacts


In [8]:
train(0.2, 0.2)

Elasticnet model (alpha=0.200000, l1_ratio=0.200000):
  RMSE: 0.7859129997062341
  MAE: 0.6155290394093894
  R2: 0.20224631822892103
Save to: /vagrant/data/mlflow-walkthrough/mlruns/313964387828406350/d8bb112efa09440b8d5afcda2896a8eb/artifacts


In [9]:
train(0.1, 0.1)

Elasticnet model (alpha=0.100000, l1_ratio=0.100000):
  RMSE: 0.7792546522251949
  MAE: 0.6112547988118586
  R2: 0.2157063843066196
Save to: /vagrant/data/mlflow-walkthrough/mlruns/313964387828406350/adabe97615d447dfacc2e144b954138d/artifacts


In [11]:
#1.1 Comparing runs
#Run mlflow ui in a terminal or http://your-tracking-server-host:5000 to view the experiment log and visualize and compare different runs and experiments. The logs and the model artifacts are saved in the mlruns directory (or where you specified).

In [12]:
# 2. Packaging the experiment as a MLflow project as conda env
# Specify the entrypoint for this project by creating a MLproject file and adding an conda environment with a conda.yaml. You can copy the yaml file from the experiment logs.

# To run this project, invoke mlflow run . -P alpha=0.42. After running this command, MLflow runs your training code in a new Conda environment with the dependencies specified in conda.yaml.

In [13]:
# 3. Deploy the model
# Deploy the model locally by running

# mlflow models serve -m mlruns/0/f5f7c052ddc5469a852aa52c14cabdf1/artifacts/model/ -h 0.0.0.0 -p 1234 --env-manager=local

# Test the endpoint:

# curl -d '{"dataframe_split": {"columns": ["fixed acidity","volatile acidity","citric acid","residual sugar","chlorides","free sulfur dioxide","total sulfur dioxide","density","pH","sulphates","alcohol"],"data": [[7,0.27,0.36,20.7,0.045,45,170,1.001,3,0.45,8.8]]}}' \-H 'Content-Type: application/json' -X POST http://0.0.0.0:1234/invocations