# Get started with Metrics Tracking and Monitoring

This notebook demonstrates how to use Evidently to:
* Generate a model performance report and calculate associated metrics.
* Log model metrics to MLFlow.
* Store the model in MLFlow as an artifact.
* Store the model performance report in MLFlow as an artifact.

In [2]:
%load_ext autoreload
%autoreload 2

import joblib
import mlflow
import mlflow.sklearn
from mlflow.tracking import MlflowClient
import pandas as pd
from pathlib import Path
from sklearn import ensemble, model_selection
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error

from config import MLFLOW_TRACKING_URI

## Load Data

More information about the dataset can be found in UCI machine learning repository: https://archive.ics.uci.edu/ml/datasets/bike+sharing+dataset

Acknowledgement: Fanaee-T, Hadi, and Gama, Joao, 'Event labeling combining ensemble detectors and background knowledge', Progress in Artificial Intelligence (2013): pp. 1-15, Springer Berlin Heidelberg

In [3]:
# Download original dataset with: python src/pipelines/load_data.py 

raw_data = pd.read_csv(f"../data/raw_data.csv")
raw_data.head()

Unnamed: 0,instant,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,2011-01-01,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0,3,13,16
1,2,2011-01-01,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0,8,32,40
2,3,2011-01-01,1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0.0,5,27,32
3,4,2011-01-01,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0,3,10,13
4,5,2011-01-01,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0,0,1,1


## Define column mapping

In [4]:
target = 'cnt'
prediction = 'prediction'
numerical_features = ['temp', 'atemp', 'hum', 'windspeed', 'mnth', 'hr', 'weekday']
categorical_features = ['season', 'holiday', 'workingday', ]

In [5]:
sample_data = raw_data.set_index('dteday').loc['2011-01-01 00:00:00':'2011-01-28 23:00:00'].reset_index()

print(sample_data.shape)

(594, 17)


In [6]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    sample_data[numerical_features + categorical_features],
    sample_data[target],
    test_size=0.3
)

print(X_train.shape)
print(X_test.shape)

(415, 10)
(179, 10)


## Train a  Linear Regression Model

In [7]:
model_lr = LinearRegression()
model_lr.fit(X_train, y_train) 

model_lr_path = Path('../models/model_lr.joblib')
joblib.dump(model_lr, model_lr_path)

['../models/model_lr.joblib']

In [8]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

preds_lr = model_lr.predict(X_test)

me = mean_squared_error(y_test, preds_lr)
mae = mean_absolute_error(y_test, preds_lr)

print(me, mae)

1417.1294535748548 30.587932545201273


## Train a RandomForestRegressor Model

In [9]:
model_rf = ensemble.RandomForestRegressor(random_state = 0, n_estimators = 50)
model_rf.fit(X_train, y_train) 

model_path = Path('../models/model_rf.joblib')
joblib.dump(model_rf, model_path)

['../models/model_rf.joblib']

In [10]:

preds_rf = model_rf.predict(X_test)

me = mean_squared_error(y_test, preds_rf)
mae = mean_absolute_error(y_test, preds_rf)

print(me, mae)

411.9559240223464 13.096759776536313


## Build the model validation report

In [11]:
report = X_test.copy()
report['target'] = y_test
report['prediction_lr'] = preds_lr
report['prediction_rf'] = preds_rf

report.head()

Unnamed: 0,temp,atemp,hum,windspeed,mnth,hr,weekday,season,holiday,workingday,target,prediction_lr,prediction_rf
189,0.1,0.1212,0.54,0.1343,1,4,1,1,0,1,3,42.600416,1.82
553,0.22,0.2121,0.87,0.2985,1,8,3,1,0,1,72,34.885639,197.08
77,0.22,0.197,0.37,0.3284,1,10,3,1,0,1,57,78.716375,60.34
417,0.26,0.2273,0.48,0.2985,1,10,4,1,0,1,57,75.267107,71.08
518,0.14,0.1364,0.59,0.194,1,18,1,1,0,1,153,64.467916,127.52


# Model Registry

## Set up MLFlow

In [12]:
# Set up MLFlow 
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

# Set up MLFlow Client
client = MlflowClient()
print(f"Client tracking uri: {client.tracking_uri}")

# Set experiment name
mlflow.set_experiment('Model Registy')


2023/09/19 10:04:06 INFO mlflow.tracking.fluent: Experiment with name 'Model Registy' does not exist. Creating a new experiment.


Client tracking uri: http://localhost:5000


<Experiment: artifact_location='mlflow-artifacts:/308995660978723051', creation_time=1695110646323, experiment_id='308995660978723051', last_update_time=1695110646323, lifecycle_stage='active', name='Model Registy', tags={}>

## Registering a Model

- Docs on [mlflow.sklearn.log_model](https://www.mlflow.org/docs/latest/python_api/mlflow.sklearn.html?highlight=save_model#mlflow.sklearn.log_model)

### Log the `model_lr` model

In [13]:
with mlflow.start_run() as run: 
    
    # Log the sklearn model and register as version 1
    mlflow.sklearn.log_model(
        sk_model=model_lr,
        artifact_path="LinearRegression"
    )

### Log and Register the `model_rf` model

- Use `registered_model_name` to register a model automatically.
- If a registered model with the name doesn’t exist, the method registers a new model and creates `Version 1`.
- If a registered model with the name exists, the method creates a new model version.

INSTRUCTION: 
- Run the cell below 3 time to register 3 versions of the "bike-sharing-RandomForestRegressor" model

In [14]:
from mlflow.models import infer_signature

with mlflow.start_run() as run: 

    # Show newly created run metadata info
    print("Experiment id: {}".format(run.info.experiment_id))
    print("Run id: {}".format(run.info.run_id))
    print("Run name: {}".format(run.info.run_name))
    print('MLFlow tracking uri:', mlflow.get_tracking_uri())
    print('MLFlow artifact uri:', mlflow.get_artifact_uri())
    run_id = run.info.run_id

    # Infer the model signature
    signature = infer_signature(y_test, preds_rf)

    # Log the sklearn model and register as version 1
    mlflow.sklearn.log_model(
        sk_model=model_rf,
        artifact_path="RandomForest",
        signature=signature,
        registered_model_name="RandomForest",
    )

Experiment id: 308995660978723051
Run id: f31f66f15fbe47c2a382086b272177ff
Run name: polite-rook-20
MLFlow tracking uri: http://localhost:5000
MLFlow artifact uri: mlflow-artifacts:/308995660978723051/f31f66f15fbe47c2a382086b272177ff/artifacts


  inputs = _infer_schema(model_input)
Registered model 'RandomForest' already exists. Creating a new version of this model...
2023/09/19 10:04:24 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: RandomForest, version 4
Created version '4' of model 'RandomForest'.


In [15]:
# Adding or Updating an MLflow Model Descriptions

client.update_model_version(
    name="RandomForest",
    version=3,
    description="This a model version 3 description added with update_model_version() method",
)

# Note: if you got an error, make sure you run the previous cell 3 times and save 3 versions of the model! 

<ModelVersion: aliases=[], creation_timestamp=1695110162888, current_stage='Production', description='This a model version 3 description added with update_model_version() method', last_updated_timestamp=1695110669522, name='RandomForest', run_id='1c05fa31f83440829d746e7cd7d1adb6', run_link='', source='./mlruns/0/1c05fa31f83440829d746e7cd7d1adb6/artifacts/RandomForest', status='READY', status_message='', tags={}, user_id='', version='3'>

## Discover models and their stages

In [16]:
from pprint import pprint

# Listing and Searching MLflow Models

for rm in client.search_registered_models():
    pprint(dict(rm), indent=4)

{   'aliases': {},
    'creation_timestamp': 1695110137966,
    'description': '',
    'last_updated_timestamp': 1695110664319,
    'latest_versions': [   <ModelVersion: aliases=[], creation_timestamp=1695110162888, current_stage='Production', description='This a model version 3 description added with update_model_version() method', last_updated_timestamp=1695110669522, name='RandomForest', run_id='1c05fa31f83440829d746e7cd7d1adb6', run_link='', source='./mlruns/0/1c05fa31f83440829d746e7cd7d1adb6/artifacts/RandomForest', status='READY', status_message='', tags={}, user_id='', version='3'>,
                           <ModelVersion: aliases=[], creation_timestamp=1695110664319, current_stage='None', description='', last_updated_timestamp=1695110664319, name='RandomForest', run_id='f31f66f15fbe47c2a382086b272177ff', run_link='', source='mlflow-artifacts:/308995660978723051/f31f66f15fbe47c2a382086b272177ff/artifacts/RandomForest', status='READY', status_message='', tags={}, user_id='', ver

In [17]:
# Search for a specific model name and list its version details 

for mv in client.search_model_versions("name='RandomForest'"):
    pprint(dict(mv), indent=4)

{   'aliases': [],
    'creation_timestamp': 1695110162888,
    'current_stage': 'Production',
    'description': 'This a model version 3 description added with '
                   'update_model_version() method',
    'last_updated_timestamp': 1695110669522,
    'name': 'RandomForest',
    'run_id': '1c05fa31f83440829d746e7cd7d1adb6',
    'run_link': '',
    'source': './mlruns/0/1c05fa31f83440829d746e7cd7d1adb6/artifacts/RandomForest',
    'status': 'READY',
    'status_message': '',
    'tags': {},
    'user_id': '',
    'version': '3'}
{   'aliases': [],
    'creation_timestamp': 1695110664319,
    'current_stage': 'None',
    'description': '',
    'last_updated_timestamp': 1695110664319,
    'name': 'RandomForest',
    'run_id': 'f31f66f15fbe47c2a382086b272177ff',
    'run_link': '',
    'source': 'mlflow-artifacts:/308995660978723051/f31f66f15fbe47c2a382086b272177ff/artifacts/RandomForest',
    'status': 'READY',
    'status_message': '',
    'tags': {},
    'user_id': '',
    '

## Transitioninig a model stage

In [18]:
# Over the course of the model’s lifecycle, a model evolves—from development to staging to production. 
# You can transition a registered model to one of the stages: Staging, Production or Archived.

client.transition_model_version_stage(
    name="RandomForest", version=3, stage="Production"
)

<ModelVersion: aliases=[], creation_timestamp=1695110162888, current_stage='Production', description='This a model version 3 description added with update_model_version() method', last_updated_timestamp=1695110686359, name='RandomForest', run_id='1c05fa31f83440829d746e7cd7d1adb6', run_link='', source='./mlruns/0/1c05fa31f83440829d746e7cd7d1adb6/artifacts/RandomForest', status='READY', status_message='', tags={}, user_id='', version='3'>

## Download and use models from the registry

In [19]:
model_version_uri = "models:/example-model@Champion"

In [25]:
# Load the model from the model registry and score
model_uri = f"models:/RandomForest/3"
loaded_model = mlflow.sklearn.load_model(model_uri)
loaded_model

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

In [26]:
loaded_model.predict(X_test)

array([  1.82, 197.08,  60.34,  71.08, 127.52,  57.88,  60.8 ,   5.38,
        62.1 ,  12.82,  60.18,  97.74,  52.22,  62.34,  84.42,  20.86,
        32.82,  36.74,   2.42,  83.86,  84.14, 144.72, 153.02, 130.56,
         6.94,  10.6 ,  84.04,  32.58,   3.78,   2.88,  10.42,   3.24,
       144.42, 208.24,   5.34,   2.24,  78.66,  64.24,  34.66, 154.28,
        63.96,   2.04, 157.74,  70.38,  83.4 , 105.26,  88.82,  90.56,
        61.28,   4.54,   4.88, 118.32,   6.96,  45.72,  85.4 ,  50.74,
         3.02,  82.22,   2.98,  75.62,  84.  ,  30.32,  64.58,  19.56,
        58.66,  33.8 ,  37.4 ,  10.82,  11.26, 125.88,  20.22,   5.06,
         3.8 ,   5.12,   3.72,  77.6 ,  99.9 ,  27.8 ,  48.96,  12.48,
         9.44,  77.2 ,   7.44,  59.76, 100.84,  76.8 , 144.92,   3.9 ,
         1.58,  12.86,  26.42,  47.56,  51.36, 105.46,  42.68, 134.02,
        49.84,  22.88,   3.98,  76.62,  46.5 ,  52.32,  53.62,  88.94,
       125.74, 142.2 ,  57.3 ,  68.28,  57.32,  79.96, 116.86,  54.56,
      

## Deregistering, Deleting and Archiving models 

In [27]:
# Archive models version 3 from Production into Archived

client = MlflowClient()
client.transition_model_version_stage(
    name="RandomForest", version=2, stage="Archived"
)

<ModelVersion: aliases=[], creation_timestamp=1695110155270, current_stage='Archived', description='', last_updated_timestamp=1695110779960, name='RandomForest', run_id='c830d17ae8704aef817989c7e19d0a59', run_link='', source='./mlruns/0/c830d17ae8704aef817989c7e19d0a59/artifacts/RandomForest', status='READY', status_message='', tags={}, user_id='', version='2'>

In [28]:
# Note: Deleting registered models or model versions is irrevocable, so use it judiciously.

# Delete version 1 of the model
client.delete_model_version(
        name="RandomForest", version=1,
    )

RestException: RESOURCE_DOES_NOT_EXIST: Model Version (name=RandomForest, version=1) not found