In [66]:
import os

import mlflow
import mlflow.sklearn
import numpy as np
import pandas as pd
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from datetime import datetime
from mlflow.tracking import MlflowClient

In [55]:
remote_server_uri = 'http://127.0.0.1:5000'
mlflow.set_tracking_uri(remote_server_uri)

In [56]:
mlflow.tracking.get_tracking_uri()

'http://127.0.0.1:5000'

In [57]:
exp_name = 'ElasticNet_wine'
mlflow.set_experiment(exp_name)

2022/06/28 12:41:59 INFO mlflow.tracking.fluent: Experiment with name 'ElasticNet_wine' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlruns/1', experiment_id='1', lifecycle_stage='active', name='ElasticNet_wine', tags={}>

In [58]:
DATAPATH = '..\inputs'
FILEPATH = os.path.join(DATAPATH, 'wine-quality.csv')

In [59]:
data = pd.read_csv(FILEPATH)
data.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


In [60]:
def eval_metrics(actual, pred):

    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2

def load_data(file_path):

    os.makedirs(DATAPATH, exist_ok=True)
    data = pd.read_csv(file_path)

    train, test = train_test_split(data)

    x_train = train.drop(['quality'], axis=1)
    x_test = test.drop(['quality'], axis=1)
    y_train = train['quality']
    y_test = test['quality']

    return x_train, y_train, x_test, y_test

def train(file_path, alpha=0.5, l1_ratio=0.5):

    x_train, y_train, x_test, y_test = load_data(file_path)

    with mlflow.start_run():

        lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
        lr.fit(x_train, y_train)

        predictions = lr.predict(x_test)
        (rmse, mae, r2) = eval_metrics(y_test, predictions)
        
        print("Elasticnet model (alpha= %f, l1_ratio= %f):" %(alpha, l1_ratio))
        print("  RMSE: %s" %rmse)
        print(f"  MAE: %s" %mae)
        print(f"  R2: %s" %r2)

        # Log parameter, metrics, and model to MLflow
        mlflow.log_param(key="alpha", value=alpha)
        mlflow.log_param(key="l1_ratio", value=l1_ratio)
        mlflow.log_metric(key="rmse", value=rmse)
        mlflow.log_metrics({"mae": mae, "r2": r2})
        mlflow.log_artifact(file_path)
        print("Save to: {}".format(mlflow.get_artifact_uri()))
        
        mlflow.sklearn.log_model(lr, "model")

In [61]:
train(FILEPATH, 0.5, 0.5)

Elasticnet model (alpha= 0.500000, l1_ratio= 0.500000):
  RMSE: 0.8533119307263476
  MAE: 0.6534020840142702
  R2: 0.12186494219914068
Save to: mlruns/1/5845ff5da8524e40bdac9708bb3f3d10/artifacts


In [62]:
train(FILEPATH, 0.2, 0.2)

Elasticnet model (alpha= 0.200000, l1_ratio= 0.200000):
  RMSE: 0.7878067561212199
  MAE: 0.6235629245679702
  R2: 0.21942159228640334
Save to: mlruns/1/d2e600ebd28d475487bad8930d719bc8/artifacts


In [63]:
train(FILEPATH, 0.1, 0.2)

Elasticnet model (alpha= 0.100000, l1_ratio= 0.200000):
  RMSE: 0.8021425675795936
  MAE: 0.6340046497786144
  R2: 0.23176895132953312
Save to: mlruns/1/dbef64969ed044bea54ffa9291236624/artifacts


In [64]:
train(FILEPATH, 0.1, 0.1)

Elasticnet model (alpha= 0.100000, l1_ratio= 0.100000):
  RMSE: 0.7938245341259235
  MAE: 0.6196836496728543
  R2: 0.19069713309108483
Save to: mlruns/1/278f48f54a6947afa13d076381c7d1a7/artifacts


Tagging Runs

In [67]:
client = MlflowClient()
experiments = client.list_experiments()
experiments

[<Experiment: artifact_location='mlruns/0', experiment_id='0', lifecycle_stage='active', name='Default', tags={}>,
 <Experiment: artifact_location='mlruns/1', experiment_id='1', lifecycle_stage='active', name='ElasticNet_wine', tags={}>]

In [69]:
_run = client.get_run(run_id="5845ff5da8524e40bdac9708bb3f3d10")
_run

<Run: data=<RunData: metrics={'mae': 0.6534020840142702,
 'r2': 0.12186494219914068,
 'rmse': 0.8533119307263476}, params={'alpha': '0.5', 'l1_ratio': '0.5'}, tags={'mlflow.log-model.history': '[{"run_id": "5845ff5da8524e40bdac9708bb3f3d10", '
                             '"artifact_path": "model", "utc_time_created": '
                             '"2022-06-28 07:12:10.081168", "flavors": '
                             '{"python_function": {"model_path": "model.pkl", '
                             '"loader_module": "mlflow.sklearn", '
                             '"python_version": "3.9.12", "env": '
                             '"conda.yaml"}, "sklearn": {"pickled_model": '
                             '"model.pkl", "sklearn_version": "1.0.2", '
                             '"serialization_format": "cloudpickle", "code": '
                             'null}}, "model_uuid": '
                             '"7fa15e0f92434181a50a3bcef84c98a6", '
                             '"mlflow_ver

In [70]:
#add datetime tag to the run
dt = datetime.now().strftime("%d-%m-%Y (%H:%M:%S:%f)")
client.set_tag(_run.info.run_id, "deployed", dt)