forked from fastforwardlabs/cml_churn_demo_mlops
-
Notifications
You must be signed in to change notification settings - Fork 0
/
7b_ml_ops_visual.py
executable file
·93 lines (75 loc) · 3.3 KB
/
7b_ml_ops_visual.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
## Part 7b - Model Operations - Visualising Model Metrics
# This is a continuation of the previous process started in the
# `7a_ml_ops_simulations.py` script.
# Here we will load in the metrics saved to the model database in the previous step
# into a Pandas dataframe, and display different features as graphs.
#```python
# help(cdsw.read_metrics)
# Help on function read_metrics in module cdsw:
#
# read_metrics(model_deployment_crn=None, start_timestamp_ms=None, end_timestamp_ms=None, model_crn=None, model_build_crn=None)
# Description
# -----------
#
# Read metrics data for given Crn with start and end time stamp
#
# Parameters
# ----------
# model_deployment_crn: string
# model deployment Crn
# model_crn: string
# model Crn
# model_build_crn: string
# model build Crn
# start_timestamp_ms: int, optional
# metrics data start timestamp in milliseconds , if not passed
# default value 0 is used to fetch data
# end_timestamp_ms: int, optional
# metrics data end timestamp in milliseconds , if not passed
# current timestamp is used to fetch data
#
# Returns
# -------
# object
# metrics data
#```
import cdsw, time, os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import classification_report
from cmlbootstrap import CMLBootstrap
import seaborn as sns
import sqlite3
## Set the model ID
# Get the model id from the model you deployed in step 5. These are unique to each
# model on CML.
model_id = "63"
# Get the various Model CRN details
cml = CMLBootstrap()
latest_model = cml.get_model({"id": model_id, "latestModelDeployment": True, "latestModelBuild": True})
Model_CRN = latest_model ["crn"]
Deployment_CRN = latest_model["latestModelDeployment"]["crn"]
# Read in the model metrics dict.
model_metrics = cdsw.read_metrics(model_crn=Model_CRN,model_deployment_crn=Deployment_CRN)
# This is a handy way to unravel the dict into a big pandas dataframe.
metrics_df = pd.io.json.json_normalize(model_metrics["metrics"])
metrics_df.tail().T
# Write the data to SQL lite for Viz Apps
if not(os.path.exists("model_metrics.db")):
conn = sqlite3.connect('model_metrics.db')
metrics_df.to_sql(name='model_metrics', con=conn)
# Do some conversions & calculations
metrics_df['startTimeStampMs'] = pd.to_datetime(metrics_df['startTimeStampMs'], unit='ms')
metrics_df['endTimeStampMs'] = pd.to_datetime(metrics_df['endTimeStampMs'], unit='ms')
metrics_df["processing_time"] = (metrics_df["endTimeStampMs"] - metrics_df["startTimeStampMs"]).dt.microseconds * 1000
# This shows how to plot specific metrics.
sns.set_style("whitegrid")
sns.despine(left=True,bottom=True)
prob_metrics = metrics_df.dropna(subset=['metrics.probability']).sort_values('startTimeStampMs')
sns.lineplot(x=range(len(prob_metrics)), y="metrics.probability", data=prob_metrics, color='grey')
time_metrics = metrics_df.dropna(subset=['processing_time']).sort_values('startTimeStampMs')
sns.lineplot(x=range(len(prob_metrics)), y="processing_time", data=prob_metrics, color='grey')
# This shows how the model accuracy drops over time.
agg_metrics = metrics_df.dropna(subset=["metrics.accuracy"]).sort_values('startTimeStampMs')
sns.barplot(x=list(range(1,len(agg_metrics)+1)), y="metrics.accuracy", color="grey", data=agg_metrics)