# Production model training with K-Folds cross-validation


This notebook shows how you can use the Evidently to:
* calculate prerformance and data drift for the model, performed as batch checks 
* log models quality & data drift using MLflow Tracking
* explore the result 

More examples are avaliable in the github: https://github.com/evidentlyai/evidently/tree/main/examples

Evidently docs: https://docs.evidentlyai.com/

Join our Discord: https://discord.com/invite/xZjKRaNp8b

In [2]:
%load_ext autoreload
%autoreload 2

import datetime
import joblib
import pandas as pd
import numpy as np
import requests
import zipfile
import io
import json

from pathlib import Path
from sklearn import datasets, ensemble, model_selection
from typing import List, Dict, Tuple

from evidently.metrics import RegressionQualityMetric, RegressionErrorPlot, RegressionErrorDistribution
from evidently.metric_preset import DataDriftPreset, RegressionPreset
from evidently.pipeline.column_mapping import ColumnMapping
from evidently.report import Report

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

## Bicycle Demand Data

More information about the dataset can be found in UCI machine learning repository: https://archive.ics.uci.edu/ml/datasets/bike+sharing+dataset

Acknowledgement: Fanaee-T, Hadi, and Gama, Joao, 'Event labeling combining ensemble detectors and background knowledge', Progress in Artificial Intelligence (2013): pp. 1-15, Springer Berlin Heidelberg

In [4]:
# content = requests.get("https://archive.ics.uci.edu/static/public/275/bike+sharing+dataset.zip").content
# with zipfile.ZipFile(io.BytesIO(content)) as arc:
#     raw_data = pd.read_csv(arc.open("hour.csv"), header=0, sep=',', parse_dates=['dteday']) 
    
# raw_data.index = raw_data.apply(lambda row: datetime.datetime.combine(row.dteday.date(), datetime.time(row.hr)), axis=1)

# Load data saved in the previous step (in train_model.ipynb)
raw_data = pd.read_csv('../data/raw_data.csv', index_col=0)

In [5]:
raw_data.head()

Unnamed: 0,instant,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
2011-01-01 00:00:00,1,2011-01-01,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0,3,13,16
2011-01-01 01:00:00,2,2011-01-01,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0,8,32,40
2011-01-01 02:00:00,3,2011-01-01,1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0.0,5,27,32
2011-01-01 03:00:00,4,2011-01-01,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0,3,10,13
2011-01-01 04:00:00,5,2011-01-01,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0,0,1,1


# K-Folds split setup

## Model training 

In [6]:
target = 'cnt'
prediction = 'prediction'
datetime = 'dteday'
numerical_features = ['temp', 'atemp', 'hum', 'windspeed', 'mnth', 'hr', 'weekday']
categorical_features = ['season', 'holiday', 'workingday', ]#'weathersit']

column_mapping = ColumnMapping()
column_mapping.target = target
column_mapping.prediction = prediction
column_mapping.datetime = datetime
column_mapping.numerical_features = numerical_features
column_mapping.categorical_features = categorical_features

# Model Quality Evaluation (Prod)

In [7]:
import mlflow
#import mlflow.sklearn
from mlflow.tracking import MlflowClient

In [8]:
start_date_0 = '2011-01-02 00:00:00'
end_date_0 = '2011-01-30 23:00:00'

experiment_batches = [
    
    ('2011-01-31 00:00:00','2011-02-06 23:00:00'),
    ('2011-02-07 23:00:00','2011-02-13 23:00:00'),
    ('2011-02-14 23:00:00','2011-02-20 23:00:00'),
    ('2011-02-21 00:00:00','2011-02-27 23:00:00'),
    ('2011-02-28 00:00:00','2011-03-06 23:00:00'),  
]

In [9]:
reference = raw_data.loc[start_date_0:end_date_0]
print(reference.shape)

(640, 17)


In [10]:
REPORTS_DIR = '../reports'

model_path = Path('../models/model.joblib')

# Data Drift

In [11]:
from src.reports import (
    build_regression_quality_report,
    get_regression_quality_metrics,
    build_data_drift_report,
    get_data_drift_metrics,
)

from src.plots import (
    detect_dataset_drift,
    detect_features_drift,
    # plot_drifted_feature_scores,
    # plot_drifted_features, 
)

from src.reports import build_model_performance_test_report, get_test_status

In [12]:
from config import MLFLOW_TRACKING_URI


mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
client = MlflowClient()
print(f"Client tracking uri: {client.tracking_uri}")

# Get or Create an experiment by name 
EXPERIMENT_NAME = "Test Model"
experiment = client.get_experiment_by_name(EXPERIMENT_NAME)

if experiment: 
    
    experiment_id = experiment.experiment_id if experiment else None
    # Show experiment info
    print("Name: {}".format(experiment.name))
    print("Experiment ID: {}".format(experiment.experiment_id))
    print("Experiment Name: {}".format(experiment.name))
    print("Artifact Location: {}".format(experiment.artifact_location))
    print("Lifecycle_stage: {}".format(experiment.lifecycle_stage))

else:
    
    # Create_experiment
    experiment_id = client.create_experiment(EXPERIMENT_NAME)
    print("Experiment ID: {}".format(experiment_id))

mlflow.set_experiment(EXPERIMENT_NAME)

Client tracking uri: http://localhost:5000
Experiment ID: 512570806058847602


<Experiment: artifact_location='mlflow-artifacts:/512570806058847602', creation_time=1688654691939, experiment_id='512570806058847602', last_update_time=1688654691939, lifecycle_stage='active', name='Test Model', tags={}>

In [14]:

ref_end_data = end_date_0
FEATURE_COLUMNS = numerical_features + categorical_features

# Start a new Run (Parent Run)
with mlflow.start_run() as run: 
    
    # Show newly created run metadata info
    print("Experiment id: {}".format(run.info.experiment_id))
    print("Run id: {}".format(run.info.run_id))
    print("Run name: {}".format(run.info.run_name))
    
    metrics_model = {}
    metrics_data = {}
    metrics_test = {}
    features_historical_drift = []
    features_historical_drift_pvalues = []
    test_status = 0

    #start new run
    for k, test_dates in enumerate(experiment_batches):
        
        print(f"Batch: {k}")
        
        train_dates = start_date_0, ref_end_data
        ref_end_data = test_dates[1] # Update reference end date for the next train batch 
        print(f"Train dates: {train_dates}") 
        print(f"Test (current) dates: {test_dates}") 
        
        train_data = raw_data.loc[train_dates[0]:train_dates[1]]
        X_train = train_data.loc[:, FEATURE_COLUMNS]
        y_train = train_data.loc[:, target]
        print(X_train.shape, y_train.shape)
        
        test_data = raw_data.loc[test_dates[0]:test_dates[1]]
        X_test = test_data.loc[:, FEATURE_COLUMNS]
        y_test = test_data[target]
        print(X_test.shape, y_test.shape)
        
        # Train model
        regressor = ensemble.RandomForestRegressor(random_state = 0, n_estimators = 50)
        regressor.fit(X_train, y_train)
        
        # Calculate predictions
        ref_prediction = regressor.predict(train_data[FEATURE_COLUMNS])
        train_data['prediction'] = ref_prediction
        cur_prediction = regressor.predict(test_data[FEATURE_COLUMNS])
        test_data['prediction'] = cur_prediction
        
        
        # Calculate Model Quality metrics
        regression_quality_report = build_regression_quality_report(
            reference_data=train_data, 
            current_data=test_data,
            column_mapping=column_mapping
        )
        train_metrics = get_regression_quality_metrics(regression_quality_report)
        metrics_model.update({test_dates[1]: train_metrics})
        
        # Calculate Data Drift metrics
        data_drift_report = build_data_drift_report(
            reference_data=X_train.reset_index(drop=True), 
            current_data=X_test.reset_index(drop=True),
            column_mapping=column_mapping,
            drift_share=0.4
        )
        data_drift_metrics: Dict = get_data_drift_metrics(data_drift_report)
        metrics_data.update({test_dates[1]: data_drift_metrics})
        
        model_quality_report_path = f"{REPORTS_DIR}/model_quality_report.html"
        regression_quality_report.save_html(model_quality_report_path)
        
        # Run a Child Run for each Fold 
        with mlflow.start_run(run_name=test_dates[1], 
                              nested=True,
                              ) as nested_run:
            
            # Show newly created run metadata info
            print("Run id: {}".format(nested_run.info.run_id))
            print("Run name: {}".format(nested_run.info.run_name))

            # Log parameters
            mlflow.log_param("begin", test_dates[0])
            mlflow.log_param("end", test_dates[1])
            
            # Log metrics
            mlflow.log_metrics(train_metrics)
            mlflow.log_metrics(data_drift_metrics)
            
            # Log the regression_quality_report as an artifact
            mlflow.log_artifact(model_quality_report_path)
            
            # Log Data Drift report ONLY if drift is detected
            if data_drift_metrics['dataset_drift'] is True:
                report_date = test_dates[1].split(' ')[0]
                data_drift_report_path = f"../reports/data_drift_report_{report_date}.html"
                data_drift_report.save_html(data_drift_report_path)
                mlflow.log_artifact(data_drift_report_path)
            
    
    # Save final  model
    joblib.dump(regressor, model_path)
    
    # Log the last batch model as the parent Run model
    mlflow.log_artifact(model_path)
    
    # Log metrics
    avg_model_metrics = pd.DataFrame.from_dict(metrics_model).T.mean().round(3).to_dict()
    mlflow.log_metrics(avg_model_metrics)
    
    avg_data_metrics = pd.DataFrame.from_dict(metrics_data).T.mean().round(3).to_dict()
    mlflow.log_metrics(avg_data_metrics)
    
    # Test the final model 
    model_test_report = build_model_performance_test_report(
        current_data=X_test,
        column_mapping=column_mapping,
    )
    test_status = get_test_status(model_test_report)
    if test_status == 0:
        print("Test failed")
        model_test_report_path = f"../reports/model_test_report_report.html"
        model_test_report.save_html(model_test_report_path)
        mlflow.log_artifact(model_test_report_path)
    mlflow.log_metric("test_status", test_status)
    

Experiment id: 512570806058847602
Run id: f23e2040ace24e20a937e9c116644292
Run name: auspicious-yak-26
Batch: 0
Train dates: ('2011-01-02 00:00:00', '2011-01-30 23:00:00')
Test (current) dates: ('2011-01-31 00:00:00', '2011-02-06 23:00:00')
(640, 10) (640,)
(165, 10) (165,)
Run id: a02fddd0e1d24ad290af9f005e25dc70
Run name: 2011-02-06 23:00:00
Batch: 1
Train dates: ('2011-01-02 00:00:00', '2011-02-06 23:00:00')
Test (current) dates: ('2011-02-07 23:00:00', '2011-02-13 23:00:00')
(805, 10) (805,)
(140, 10) (140,)
Run id: cbb1c663f2eb4557a8779a7999e843fa
Run name: 2011-02-13 23:00:00
Batch: 2
Train dates: ('2011-01-02 00:00:00', '2011-02-13 23:00:00')
Test (current) dates: ('2011-02-14 23:00:00', '2011-02-20 23:00:00')
(968, 10) (968,)
(142, 10) (142,)
Run id: 873b33f183e441deafd8d9f264d3ca08
Run name: 2011-02-20 23:00:00
Batch: 3
Train dates: ('2011-01-02 00:00:00', '2011-02-20 23:00:00')
Test (current) dates: ('2011-02-21 00:00:00', '2011-02-27 23:00:00')
(1133, 10) (1133,)
(158, 10) (