In [12]:
import os
import warnings
import pandas as pd
import numpy as np

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet

import mlflow
import mlflow.sklearn

In [9]:
data_path = "winequality-white.csv"
data = pd.read_csv(data_path, sep=';')

data.sample(10)

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
819,6.6,0.56,0.22,8.9,0.034,27.0,133.0,0.99675,3.2,0.51,9.1,5
2977,6.6,0.23,0.2,11.4,0.044,45.0,131.0,0.99604,2.96,0.51,9.7,6
955,7.6,0.345,0.26,1.9,0.043,15.0,134.0,0.9936,3.08,0.38,9.5,5
3085,8.5,0.23,0.4,9.9,0.036,24.0,88.0,0.9951,3.02,0.42,10.5,6
834,7.3,0.205,0.31,1.7,0.06,34.0,110.0,0.9963,3.72,0.69,10.5,6
2254,6.2,0.26,0.19,3.4,0.049,47.0,172.0,0.9924,3.14,0.43,10.4,6
2339,7.0,0.23,0.26,7.2,0.041,21.0,90.0,0.99509,3.22,0.55,9.5,6
4330,6.4,0.24,0.26,8.2,0.054,47.0,182.0,0.99538,3.12,0.5,9.5,5
2898,8.6,0.18,0.28,0.8,0.032,25.0,78.0,0.99104,2.99,0.38,11.1,5
167,6.7,0.46,0.18,2.4,0.034,25.0,98.0,0.9896,3.08,0.44,12.6,7


1. Tracking experiments

In [3]:
remote_server_uri = "http://127.0.0.1:5000" # set to your server URI
mlflow.set_tracking_uri(remote_server_uri)  # or set the MLFLOW_TRACKING_URI in the env

In [4]:
mlflow.tracking.get_tracking_uri()

'http://127.0.0.1:5000'

In [31]:
exp_name = "ElasticNet_wine"
mlflow.set_experiment(exp_name)

INFO: 'ElasticNet_wine' does not exist. Creating a new experiment


In [32]:
def eval_metrics(actual, pred):
    # compute relevant metrics
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2

In [33]:
def load_data(data_path):
    data = pd.read_csv(data_path, sep=';')

    # Split the data into training and test sets. (0.75, 0.25) split.
    train, test = train_test_split(data)
    # The predicted column is "quality" which is a scalar from [3, 9]
    train_x = train.drop(["quality"], axis=1)
    test_x = test.drop(["quality"], axis=1)
    train_y = train[["quality"]]
    test_y = test[["quality"]]
    return train_x, train_y, test_x, test_y

In [38]:
def train(alpha=0.5, l1_ratio=0.5):
    # train a model with given parameters
    warnings.filterwarnings("ignore")
    np.random.seed(40)

    train_x, train_y, test_x, test_y = load_data("winequality-white.csv")

    # Useful for multiple runs (only doing one run in this sample notebook)    
    with mlflow.start_run():
        # Execute ElasticNet
        lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
        lr.fit(train_x, train_y)

        # Evaluate Metrics
        predicted_qualities = lr.predict(test_x)
        (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

        # Print out metrics
        print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
        print("  RMSE: %s" % rmse)
        print("  MAE: %s" % mae)
        print("  R2: %s" % r2)

        # Log parameter, metrics, and model to MLflow
        # Log params, metrics and model to MLflow
        mlflow.log_param(key="alpha", value=alpha)
        mlflow.log_param(key="l1_ratio", value=l1_ratio)
        mlflow.log_param(key="rmse", value=rmse)
        mlflow.log_metrics({"mae": mae, "r2": r2})
        print("Save to: {}".format(mlflow.get_artifact_uri()))
        
        mlflow.sklearn.log_model(lr, "model")

In [39]:
train(0.5, 0.5)

Elasticnet model (alpha=0.500000, l1_ratio=0.500000):
  RMSE: 0.82224284975954
  MAE: 0.6278761410160693
  R2: 0.12678721972772689
Save to: mlruns/1/101509e72b914b4197bf7c9ca6f0f596/artifacts


**2. Packaging the experiment as a MLflow project as conda env**

Specify the entrypoint for this project by creating a MLproject file and 
adding an conda environment with a conda.yaml. You can copy the yaml file from the experiment logs.

To run this project, invoke mlflow run . -P alpha=0.42. 
After running this command, MLflow runs your training code in a new Conda environment
with the dependencies specified in conda.yaml.

**3. Deploy the model**

mlflow models serve -m mlruns/0/f5f7c052ddc5469a852aa52c14cabdf1/artifacts/model/ -h 0.0.0.0 -p 1234


#Test the endpoint:

curl -X POST -H "Content-Type:application/json; format=pandas-split" \
                --data '{"columns":["alcohol", "chlorides", "citric acid", "density", "fixed acidity", "free sulfur dioxide", "pH", "residual sugar", "sulphates", "total sulfur dioxide", "volatile acidity"],"data":[[12.8, 0.029, 0.48, 0.98, 6.2, 29, 3.33, 1.2, 0.39, 75, 0.66]]}' \
                http://0.0.0.0:1234/invocations

#You can also simply build a docker image from your model
mlflow models build-docker -m mlruns/1/d671f37a9c7f478989e67eb4ff4d1dac/artifacts/model/ -n elastic_net_wine

#and run the container with

docker run -p 8080:8080 elastic_net_wine.

#Or you can directly deploy to AWS sagemaker or Microsoft Azure ML.3. Deploy the model


**4. Tagging runs**

In [44]:
from datetime import datetime
from mlflow.tracking import MlflowClient
client = MlflowClient()
experiments = client.list_experiments() # returns a list of mlflow.entities.Experiment
print(experiments)

[<Experiment: artifact_location='mlruns/0', experiment_id='0', lifecycle_stage='active', name='Default', tags={}>, <Experiment: artifact_location='mlruns/1', experiment_id='1', lifecycle_stage='active', name='ElasticNet_wine', tags={}>]


In [47]:
# get the run
run = client.get_run(run_id="101509e72b914b4197bf7c9ca6f0f596")
print(run)

<Run: data=<RunData: metrics={'mae': 0.6278761410160693, 'r2': 0.12678721972772689}, params={'alpha': '0.5', 'l1_ratio': '0.5', 'rmse': '0.82224284975954'}, tags={'mlflow.log-model.history': '[{"run_id": "101509e72b914b4197bf7c9ca6f0f596", '
                             '"artifact_path": "model", "utc_time_created": '
                             '"2021-03-17 09:42:22.804196", "flavors": '
                             '{"python_function": {"model_path": "model.pkl", '
                             '"loader_module": "mlflow.sklearn", '
                             '"python_version": "3.8.5", "env": "conda.yaml"}, '
                             '"sklearn": {"pickled_model": "model.pkl", '
                             '"sklearn_version": "0.24.1", '
                             '"serialization_format": "cloudpickle"}}}]',
 'mlflow.source.name': 'C:\\Users\\mohammadabbas.me\\Miniconda3\\lib\\site-packages\\ipykernel_launcher.py',
 'mlflow.source.type': 'LOCAL',
 'mlflow.user': 'mohammadabba

In [49]:
# add a tag to the run
dt = datetime.now().strftime("%d-%m-%Y (%H:%M:%S.%f)")
client.set_tag(run.info.run_id, "deployed", dt)

**Connect to a postgesql db**


sudo -u postgres psql

CREATE DATABASE mlflow;
CREATE USER mlflow WITH ENCRYPTED PASSWORD 'mlflow';
GRANT ALL PRIVILEGES ON DATABASE mlflow TO mlflow;
mlflow server --backend-store-uri postgresql://mlflow:mlflow@localhost/mlflow \
              --default-artifact-root file: C:/Users/mohammadabbas.me/assignments/module3/assignment2_MLFlow//mlruns \
              -h 0.0.0.0 -p 8000

Look at the db:

psql mlflow

SELECT * FROM experiments;

SELECT * FROM runs;