#### Model monitoring

In [None]:
# import required libraries
import os
from datetime import datetime, timedelta

import ibis
import numpy as np
import pandas as pd
import pins
import requests
import rsconnect
import vetiver
import xgboost as xgb
from dotenv import load_dotenv
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sqlalchemy import create_engine
import re

In [None]:
# Simulate multiple days of input data
data = X_test.copy()
data["RESULTS"] = y_test
data["date_obs"] = ""
data

Unnamed: 0,BAKERY,GROCERY_STORE,RESTAURANT,HIGH_RISK,MEDIUM_RISK,LOW_RISK,CUM_VIOLATIONS,RESULTS,date_obs
249233,0,0,1,1,0,0,9.0,0,
252337,0,1,0,1,0,0,0.0,1,
249454,0,0,1,1,0,0,3.0,1,
255802,0,0,1,1,0,0,33.0,0,
251926,0,0,1,0,1,0,2.0,1,
...,...,...,...,...,...,...,...,...,...
243741,0,0,1,1,0,0,6.0,1,
246107,0,0,1,1,0,0,7.0,1,
249070,0,0,1,0,1,0,9.0,1,
253259,0,0,1,1,0,0,2.0,0,


In [None]:
endpoint = "https://connect.conf23workflows.training.posit.co/inspection_results/predict"
data["preds"] = vetiver.predict(endpoint, data).iloc[:, 0].values

In [None]:
# add last three dates
day_list = [
    (pd.Timestamp.today() - timedelta(days=2)).strftime("%Y-%m-%d"),
    (pd.Timestamp.today() - timedelta(days=1)).strftime("%Y-%m-%d"),
    (pd.Timestamp.today().strftime("%Y-%m-%d")),
]
day_list

['2023-09-05', '2023-09-06', '2023-09-07']

In [None]:
data["date_obs"] = np.random.choice(day_list, size=len(data))

In [None]:
data

Unnamed: 0,BAKERY,GROCERY_STORE,RESTAURANT,HIGH_RISK,MEDIUM_RISK,LOW_RISK,CUM_VIOLATIONS,RESULTS,date_obs,preds
249233,0,0,1,1,0,0,9.0,0,2023-09-07,0
252337,0,1,0,1,0,0,0.0,1,2023-09-06,1
249454,0,0,1,1,0,0,3.0,1,2023-09-07,1
255802,0,0,1,1,0,0,33.0,0,2023-09-05,1
251926,0,0,1,0,1,0,2.0,1,2023-09-05,1
...,...,...,...,...,...,...,...,...,...,...
243741,0,0,1,1,0,0,6.0,1,2023-09-07,0
246107,0,0,1,1,0,0,7.0,1,2023-09-05,0
249070,0,0,1,0,1,0,9.0,1,2023-09-05,1
253259,0,0,1,1,0,0,2.0,0,2023-09-06,1


In [None]:
# pin this dataset for use in model card
model_board.pin_write(data, f"{connect_username}/inspection_results_monitoring",type="parquet")

Writing pin:
Name: 'gagan/inspection_results_monitoring'
Version: 20230907T194850Z-601b8


Meta(title='inspection_results_monitoring: a pinned 2854 x 10 DataFrame', description=None, created='20230907T194850Z', pin_hash='601b8290d99cf506', file='inspection_results_monitoring.parquet', file_size=30292, type='parquet', api_version=1, version=VersionRaw(version='181'), tags=None, name='gagan/inspection_results_monitoring', user={}, local={})

In [None]:
# define which metrics to track
metric_set = [metrics.mean_absolute_error, metrics.mean_squared_error, metrics.r2_score]

# choose time period to track over
td = timedelta(days=1)

# calculate metrics
original_metrics = vetiver.compute_metrics(
    data=data,
    date_var="date_obs",
    period=td,
    metric_set=metric_set,
    truth="RESULTS",
    estimate="preds",
)

In [None]:
original_metrics

Unnamed: 0,index,n,metric,estimate
0,2023-09-05,974,mean_absolute_error,0.380903
1,2023-09-05,974,mean_squared_error,0.380903
2,2023-09-05,974,r2_score,-0.562679
3,2023-09-06,906,mean_absolute_error,0.38521
4,2023-09-06,906,mean_squared_error,0.38521
5,2023-09-06,906,r2_score,-0.585465


In [None]:
# Write metrics as a pin
model_board.pin_write(original_metrics, f"{connect_username}/inspection_model_metrics", type="csv")

Writing pin:
Name: 'gagan/inspection_model_metrics'
Version: 20230907T194900Z-8f199


Meta(title='inspection_model_metrics: a pinned 6 x 4 DataFrame', description=None, created='20230907T194900Z', pin_hash='8f1992cf760235fb', file='inspection_model_metrics.csv', file_size=326, type='csv', api_version=1, version=VersionRaw(version='182'), tags=None, name='gagan/inspection_model_metrics', user={}, local={})

In [None]:
# Model card
vetiver.model_card(path=".")

'./model_card.qmd'