# Get started with Metrics Tracking and Monitoring

This notebook shows how you can use the Evidently to:
* calculate the model performance report
* log the model validation metrics to MLFlow 
* log the model to MLFlow as artifact
* log the model performance report to MLFlow as artifact

In [1]:
import datetime
import joblib
import pandas as pd
import numpy as np
import requests
import zipfile
import io
import json

from pathlib import Path
from sklearn import datasets, ensemble, model_selection

from evidently.metrics import RegressionQualityMetric, RegressionErrorPlot, RegressionErrorDistribution
from evidently.metric_preset import DataDriftPreset, RegressionPreset
from evidently.pipeline.column_mapping import ColumnMapping
from evidently.report import Report

  @numba.jit()
  @numba.jit()
  @numba.jit()
  @numba.jit()


In [2]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

## Bicycle Demand Data

More information about the dataset can be found in UCI machine learning repository: https://archive.ics.uci.edu/ml/datasets/bike+sharing+dataset

Acknowledgement: Fanaee-T, Hadi, and Gama, Joao, 'Event labeling combining ensemble detectors and background knowledge', Progress in Artificial Intelligence (2013): pp. 1-15, Springer Berlin Heidelberg

In [3]:
content = requests.get("https://archive.ics.uci.edu/static/public/275/bike+sharing+dataset.zip").content
with zipfile.ZipFile(io.BytesIO(content)) as arc:
    raw_data = pd.read_csv(arc.open("hour.csv"), header=0, sep=',', parse_dates=['dteday']) 

In [4]:
raw_data.index = raw_data.apply(lambda row: datetime.datetime.combine(row.dteday.date(), datetime.time(row.hr)),
                                axis=1)

In [5]:
raw_data.head()

Unnamed: 0,instant,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
2011-01-01 00:00:00,1,2011-01-01,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0,3,13,16
2011-01-01 01:00:00,2,2011-01-01,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0,8,32,40
2011-01-01 02:00:00,3,2011-01-01,1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0.0,5,27,32
2011-01-01 03:00:00,4,2011-01-01,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0,3,10,13
2011-01-01 04:00:00,5,2011-01-01,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0,0,1,1


In [6]:
# Save raw data for further usage

raw_data.to_csv('../data/raw_data.csv')

## Model training 

In [7]:
target = 'cnt'
prediction = 'prediction'
numerical_features = ['temp', 'atemp', 'hum', 'windspeed', 'mnth', 'hr', 'weekday']
categorical_features = ['season', 'holiday', 'workingday', ]#'weathersit']

In [8]:
reference = raw_data.loc['2011-01-01 00:00:00':'2011-01-28 23:00:00']
current = raw_data.loc['2011-01-29 00:00:00':'2011-02-28 23:00:00']

In [9]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    reference[numerical_features + categorical_features],
    reference[target],
    test_size=0.3
)

In [10]:
regressor = ensemble.RandomForestRegressor(random_state = 0, n_estimators = 50)

regressor.fit(X_train, y_train) 


model_path = Path('../models/model_get_started.joblib')
joblib.dump(regressor, model_path)

['../models/model_get_started.joblib']

## Model validation

In [11]:
preds_train = regressor.predict(X_train)
preds_test = regressor.predict(X_test)

In [12]:
X_train['target'] = y_train
X_train['prediction'] = preds_train

X_test['target'] = y_test
X_test['prediction'] = preds_test

In [13]:
column_mapping = ColumnMapping()

column_mapping.target = 'target'
column_mapping.prediction = 'prediction'
column_mapping.numerical_features = numerical_features
column_mapping.categorical_features = categorical_features

In [14]:
regression_performance_report = Report(metrics=[
    RegressionPreset(),
])

regression_performance_report.run(
    reference_data=X_train.sort_index(), 
    current_data=X_test.sort_index(),
    column_mapping=column_mapping)
regression_performance_report

## Log Model Training and Validation Metrics

### Set up MLFlow

In [15]:
import mlflow
import mlflow.sklearn
from mlflow.tracking import MlflowClient

from config import MLFLOW_TRACKING_URI

# Set up MLFlow Client
# mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
mlflow.set_tracking_uri('../mlruns')
client = MlflowClient()

print(f"Client tracking uri: {client.tracking_uri}")



Client tracking uri: ../mlruns


In [16]:
# Extract Model Train Metrics from the report

train_report_metrics = regression_performance_report.as_dict()

me = train_report_metrics['metrics'][0]['result']['current']['mean_error']
mae = train_report_metrics['metrics'][0]['result']['current']["mean_abs_error"]

print(me, mae)

-2.119677419354839 13.624193548387096


In [17]:
with mlflow.start_run() as run: 

    # Show newly created run metadata info
    print("Experiment id: {}".format(run.info.experiment_id))
    print("Run id: {}".format(run.info.run_id))
    print("Run name: {}".format(run.info.run_name))
    print('MLFlow tracking uri:', mlflow.get_tracking_uri())
    print('MLFlow artifact uri:', mlflow.get_artifact_uri())
    run_id = run.info.run_id

#     # Log metrics
    mlflow.log_metric('me', round(me, 3))
    mlflow.log_metric('mae', round(mae, 3))
    
#     # Log model 
    mlflow.log_artifact(model_path)

Experiment id: 0
Run id: d050875ed5f34182999c76a1dbcfe941
Run name: whimsical-bear-203
MLFlow tracking uri: ../mlruns
MLFlow artifact uri: /Users/mnrozhkov/dev/mlrepa/com/evidently/evidently-mlflow/notebooks/../mlruns/0/d050875ed5f34182999c76a1dbcfe941/artifacts


## Log Model Validation reports to MLFlow 

In [19]:
REPORTS_DIR = '../reports'

report_path = f"{REPORTS_DIR}/get_started_regression_performance_report.html"
regression_performance_report.save_html(report_path)
    
with mlflow.start_run(run_id=run_id):
    
    # Show the run metadata info
    print("Experiment id: {}".format(run.info.experiment_id))
    print("Run id: {}".format(run.info.run_id))
    print("Run name: {}".format(run.info.run_name))
    
    # Log the regression_performance_report as an artifact
    mlflow.log_artifact(report_path)

Experiment id: 0
Run id: d050875ed5f34182999c76a1dbcfe941
Run name: whimsical-bear-203
