### Tutorial based on: https://aws.amazon.com/getting-started/hands-on/build-train-deploy-monitor-machine-learning-model-sagemaker-studio/?trk=gs_card

In [101]:
%load_ext autoreload
%autoreload 2

import sys
import IPython
from IPython.display import clear_output
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from time import sleep, gmtime, strftime
import json
import time
import io
from dotenv import load_dotenv
from pathlib import Path

import boto3
import sagemaker
from sagemaker import get_execution_role

# !pip install sagemaker-experiments 
from sagemaker.analytics import ExperimentAnalytics
from smexperiments.experiment import Experiment
from smexperiments.tracker import Tracker

from sagemaker.sklearn.processing import SKLearnProcessor
from sagemaker.processing import ProcessingInput, ProcessingOutput

from urllib.parse import urlparse

from sklearn.metrics import confusion_matrix, accuracy_score

from sagemaker.model_monitor import DataCaptureConfig
from sagemaker import RealTimePredictor

from sagemaker.predictor import csv_serializer

from sagemaker.model_monitor import DefaultModelMonitor
from sagemaker.model_monitor.dataset_format import DatasetFormat
from sagemaker.model_monitor import CronExpressionGenerator

from threading import Thread

from modules import utilities, modeling, endpoints

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [65]:
env_path = Path('..') / '.env'
load_dotenv(dotenv_path=env_path)

account_num = os.environ.get('ACCOUNT_NUMBER')

### check and install correct version of sagemaker if necessary

In [66]:
def install_correct_sagemaker():
    if int(sagemaker.__version__.split('.')[0]) == 2:
        print("Installing previous SageMaker Version and restarting the kernel")
        !{sys.executable} -m pip install sagemaker==1.72.0
        IPython.Application.instance().kernel.do_shutdown(True)

    else:
        print("Version is good")

role = get_execution_role()
sess = sagemaker.Session()
region = boto3.session.Session().region_name
sm = boto3.Session().client('sagemaker')

#### Set up buckets and folders

In [67]:
rawbucket= sess.default_bucket() # Alternatively you can use our custom bucket here. 

prefix = 'sagemaker-modelmonitor' # use this prefix to store all files pertaining to this workshop.

dataprefix = prefix + '/data'
traindataprefix = prefix + '/train_data'
testdataprefix = prefix + '/test_data'
testdatanolabelprefix = prefix + '/test_data_no_label'
trainheaderprefix = prefix + '/train_headers'

#### Set up data

In [68]:
# ! wget https://archive.ics.uci.edu/ml/machine-learning-databases/00350/default%20of%20credit%20card%20clients.xls
data = pd.read_excel('default of credit card clients.xls', header=1)
data = data.drop(columns = ['ID'])
data.rename(columns={"default payment next month": "Label"}, inplace=True)
lbl = data.Label
data = pd.concat([lbl, data.drop(columns=['Label'])], axis = 1) # reorder
data.head()

Unnamed: 0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,...,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default payment next month
0,20000,2,2,1,24,2,2,-1,-1,-2,...,0,0,0,0,689,0,0,0,0,1
1,120000,2,2,2,26,-1,2,0,0,0,...,3272,3455,3261,0,1000,1000,1000,0,2000,1
2,90000,2,2,2,34,0,0,0,0,0,...,14331,14948,15549,1518,1500,1000,1000,1000,5000,0
3,50000,2,2,1,37,0,0,0,0,0,...,28314,28959,29547,2000,2019,1200,1100,1069,1000,0
4,50000,1,2,1,57,-1,0,-1,0,0,...,20940,19146,19131,2000,36681,10000,9000,689,679,0


In [71]:
if not os.path.exists('rawdata/rawdata.csv'):
    !mkdir rawdata
    data.to_csv('rawdata/rawdata.csv', index=None)
else:
    pass
# Upload the raw dataset
raw_data_location = sess.upload_data('rawdata', bucket=rawbucket, key_prefix=dataprefix)

## Preprocessing

##### Note: Amazon SageMaker Processing runs on separate compute instances from your notebook. This means you can continue to experiment and run code in your notebook while the processing job is under way. This will incur additional charges for the cost of the instance which is up and running for the duration of the processing job.

In [72]:
sklearn_processor = SKLearnProcessor(framework_version='0.20.0',
                                     role=role,
                                     instance_type='ml.c4.xlarge',
                                     instance_count=1)

In [73]:
# Copy the preprocessing code over to the s3 bucket
codeprefix = prefix + '/code'
codeupload = sess.upload_data('preprocessing.py', bucket=rawbucket, key_prefix=codeprefix)

In [74]:
train_data_location = rawbucket + '/' + traindataprefix
test_data_location = rawbucket+'/'+testdataprefix

#### Start the processing job

In [75]:
sklearn_processor.run(code=codeupload,
                      inputs=[ProcessingInput(source=raw_data_location,
                                              destination='/opt/ml/processing/input')],
                      outputs=[ProcessingOutput(output_name='train_data',
                                               source='/opt/ml/processing/train',
                                               destination='s3://' + train_data_location),
                               ProcessingOutput(output_name='test_data',
                                                source='/opt/ml/processing/test',
                                               destination="s3://"+test_data_location),
                               ProcessingOutput(output_name='train_data_headers',
                                                source='/opt/ml/processing/train_headers',
                                               destination="s3://" + rawbucket + '/' + prefix + '/train_headers')],
                      arguments=['--train-test-split-ratio', '0.2'],
                      wait=True,
                      logs=False
                     )
clear_output()

In [76]:
preprocessing_job_description = sklearn_processor.jobs[-1].describe()

output_config = preprocessing_job_description['ProcessingOutputConfig']
for output in output_config['Outputs']:
    if output['OutputName'] == 'train_data':
        preprocessed_training_data = output['S3Output']['S3Uri']
    if output['OutputName'] == 'test_data':
        preprocessed_test_data = output['S3Output']['S3Uri']

## Create an Amazon SageMaker Experiment

In [77]:
cc_experiment = Experiment.create(
    experiment_name=f"Build-train-deploy-{int(time.time())}", 
    description="Predict credit card default from payments data", 
    sagemaker_boto_client=sm)
clear_output()

In [78]:
# Start Tracking parameters used in the Pre-processing pipeline.
with Tracker.create(display_name="Preprocessing", sagemaker_boto_client=sm) as tracker:
    tracker.log_parameters({
        "train_test_split_ratio": 0.2,
        "random_state":0
    })
    # we can log the s3 uri to the dataset we just uploaded
    tracker.log_input(name="ccdefault-raw-dataset", media_type="s3/uri", value=raw_data_location)
    tracker.log_input(name="ccdefault-train-dataset", media_type="s3/uri", value=train_data_location)
    tracker.log_input(name="ccdefault-test-dataset", media_type="s3/uri", value=test_data_location)

Navigate to SageMaker resources on the LH panel & click 'Experiments and trials'

#### Set up estimator

In [103]:
xgb = modeling.create_estimator(role, rawbucket, prefix, sess, train_data_location,
                       tracker, cc_experiment, sm)
time.sleep(2)

INFO:sagemaker:Creating training-job with name: cc-training-job-1628529836



2021-08-09 17:23:56 Starting - Starting the training job
2021-08-09 17:23:58 Starting - Launching requested ML instances.............
2021-08-09 17:25:10 Starting - Preparing the instances for training.................
2021-08-09 17:26:40 Downloading - Downloading input data.......
2021-08-09 17:27:19 Training - Downloading the training image......
2021-08-09 17:27:53 Training - Training image download completed. Training in progress..
2021-08-09 17:28:05 Uploading - Uploading generated training model
2021-08-09 17:28:12 Completed - Training job completed


## Deploy the model

In [80]:
test_data_path = 's3://' + test_data_location + '/test_data.csv'
! aws s3 cp $test_data_path .
clear_output()

In [81]:
test_full = pd.read_csv('test_data.csv', names = [str(x) for x in range(len(data.columns))])
test_full.head()
label = test_full['0'] 

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
0,0,-0.341476,0.201175,20000.0,1.0,1.0,2.0,33.0,1.0,2.0,...,17399.0,19057.0,18453.0,19755.0,19288.0,2260.0,0.0,1600.0,0.0,644.0
1,0,-0.136859,0.199594,20000.0,2.0,2.0,2.0,35.0,0.0,0.0,...,19347.0,18600.0,19000.0,19000.0,20000.0,0.0,1000.0,0.0,1000.0,0.0
2,0,-0.284364,0.185736,230000.0,2.0,1.0,1.0,44.0,1.0,-1.0,...,949.0,2864.0,933.0,0.0,0.0,2873.0,933.0,0.0,0.0,0.0
3,0,-0.040569,0.28936,100000.0,1.0,2.0,1.0,42.0,0.0,0.0,...,99998.0,16138.0,17758.0,18774.0,20272.0,2000.0,2000.0,2000.0,2000.0,2000.0
4,0,0.079132,0.186502,150000.0,1.0,1.0,2.0,29.0,-2.0,-2.0,...,6917.0,831.0,6469.0,5138.0,7810.0,833.0,6488.0,5153.0,7833.0,7130.0


In [83]:
%%time

sm_transformer = xgb.transformer(1, 'ml.m5.xlarge', accept = 'text/csv')

# start a transform job
# ignore the label column
sm_transformer.transform(test_data_path, split_type='Line', input_filter='$[1:]',
                         content_type='text/csv',
                        logs=False)
sm_transformer.wait(logs=False)

INFO:sagemaker:Creating model with name: cc-training-job-1628518380
INFO:sagemaker:Creating transform job with name: sagemaker-xgboost-2021-08-09-14-18-16-129


........................................................!
CPU times: user 293 ms, sys: 0 ns, total: 293 ms
Wall time: 4min 42s


## Evaluate

In [84]:
output = utilities.get_csv_output_from_s3(sm_transformer.output_path, 'test_data.csv.out')
output_df = pd.read_csv(io.StringIO(output), sep=",", header=None)
output_df['Predicted']=np.round(output_df.values)
output_df['Label'] = label

confusion_matrix = pd.crosstab(output_df['Predicted'], output_df['Label'], rownames=['Actual'], colnames=['Predicted'], margins = True)
confusion_matrix

Predicted,0,1,All
Actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0.0,4460,815,5275
1.0,243,482,725
All,4703,1297,6000


In [85]:
print("Baseline Accuracy = {}".format(1- np.unique(data['Label'], return_counts=True)[1][1]/(len(data['Label']))))
print("Accuracy Score = {}".format(accuracy_score(label, output_df['Predicted'])))

Baseline Accuracy = 0.7787999999999999
Accuracy Score = 0.8236666666666667


## Deploy the model as an end point and set up data capture

In [86]:
sm_client = boto3.client('sagemaker')
latest_training_job = sm_client.list_training_jobs(MaxResults=1,
                                                SortBy='CreationTime',
                                                SortOrder='Descending')
training_job_name=latest_training_job['TrainingJobSummaries'][0]['TrainingJobName']

training_job_description = sm_client.describe_training_job(TrainingJobName=training_job_name)
model_data = training_job_description['ModelArtifacts']['S3ModelArtifacts']
container_uri = training_job_description['AlgorithmSpecification']['TrainingImage']    

try:
    model = modeling.create_model(sm_client, role, training_job_name, container_uri, model_data)
except Exception as e:
    sm_client.delete_model(ModelName=training_job_name)
    model = modeling.create_model(sm_client, role, training_job_name, container_uri, model_data)
        
print('Model created as model["ModelArn"]')

Model created as model["ModelArn"]


In [87]:
s3_capture_upload_path = 's3://{}/{}/monitoring/datacapture'.format(rawbucket, prefix)
data_capture_configuration = {
    "EnableCapture": True,
    "InitialSamplingPercentage": 100,
    "DestinationS3Uri": s3_capture_upload_path,
    "CaptureOptions": [
        { "CaptureMode": "Output" },
        { "CaptureMode": "Input" }
    ],
    "CaptureContentTypeHeader": {
       "CsvContentTypes": ["text/csv"],
       "JsonContentTypes": ["application/json"]}}

In [88]:
endpoints.attempt_create_endpoint_config(sm_client, training_job_name, data_capture_configuration)

Endpoint configuration created as endpoint_config["EndpointConfigArn"]


In [89]:
# Enable data capture, sampling 100% of the data for now. Next we deploy the endpoint in the correct VPC.
endpoint_name = training_job_name

try:
    endpoint = endpoints.create_endpoint(sm_client, endpoint_name, training_job_name)
except Exception as e:
    sm_client.delete_endpoint(EndpointName=endpoint_name)
    endpoint = endpoints.create_endpoint(sm_client, endpoint_name, training_job_name)

print('Endpoint created as endpoint["EndpointArn"]')
time.sleep(600) # Wait for endpoint to be created.  
# 'AWS Settings' will give you the status of the endpoint creation

Endpoint created as endpoint["EndpointArn"]


Look at endpoints in SageMaker resources. 'AWS Settings' will give you the status of the endpoint creation

In [90]:
!head -10 test_data.csv > test_sample.csv

In [91]:
predictor = RealTimePredictor(endpoint=endpoint_name, content_type = 'text/csv')

with open('test_sample.csv', 'r') as f:
    for row in f:
        payload = row.rstrip('\n')
        response = predictor.predict(data=payload[2:])
        sleep(0.5)
sleep(120) # wait so data can load into S3
print('done!')

done!


In [92]:
s3_client = boto3.Session().client('s3')
capture_files = endpoints.extract_captured_files(s3_client, prefix, endpoint_name, rawbucket)

sagemaker-modelmonitor/monitoring/datacapture/cc-training-job-1628518380/AllTraffic
Found 1 Capture Files like:


'sagemaker-modelmonitor/monitoring/datacapture/cc-training-job-1628518380/AllTraffic/2021/08/09/17/02-16-856-914576c8-b84b-43ab-9e49-1a6f2a97cbef.jsonl'

The captured file contains both the inputs and outputs of the model

In [93]:
# View contents of the captured file.
capture_file = utilities.get_obj_body(s3_client, rawbucket, capture_files[0])
print(json.dumps(json.loads(capture_file.split('\n')[5]), indent = 2, sort_keys =True))

{
  "captureData": {
    "endpointInput": {
      "data": "-0.34147611300851444,0.1932005252116958,50000.0,1.0,2.0,2.0,25.0,-1.0,3.0,2.0,0.0,0.0,0.0,10386.0,9993.0,9993.0,15300.0,0.0,0.0,200.0,5307.0,0.0,0.0",
      "encoding": "CSV",
      "mode": "INPUT",
      "observedContentType": "text/csv"
    },
    "endpointOutput": {
      "data": "0.5108723044395447",
      "encoding": "CSV",
      "mode": "OUTPUT",
      "observedContentType": "text/csv; charset=utf-8"
    }
  },
  "eventMetadata": {
    "eventId": "83cee72a-8d4f-494b-a5fe-a1a7af2eb8a5",
    "inferenceTime": "2021-08-09T17:02:19Z"
  },
  "eventVersion": "0"
}


## Monitor the endpoint with SageMaker Model Monitor

#### Check for drift

In [94]:
model_prefix = prefix + "/" + endpoint_name
baseline_prefix = model_prefix + '/baselining'
baseline_data_prefix = baseline_prefix + '/data'
baseline_results_prefix = baseline_prefix + '/results'

baseline_data_uri = 's3://{}/{}'.format(rawbucket,baseline_data_prefix)
baseline_results_uri = 's3://{}/{}'.format(rawbucket, baseline_results_prefix)
train_data_header_location = "s3://" + rawbucket + '/' + prefix + '/train_headers'

#### Model Monitor uses deequ library built on top of Apache Spark to conduct unit tests on data

In [95]:
my_default_monitor = DefaultModelMonitor(
    role=role,
    instance_count=1,
    instance_type='ml.m5.xlarge',
    volume_size_in_gb=20,
    max_runtime_in_seconds=3600)

my_default_monitor.suggest_baseline(
    baseline_dataset=os.path.join(train_data_header_location, 'train_data_with_headers.csv'),
    dataset_format=DatasetFormat.csv(header=True),
    output_s3_uri=baseline_results_uri,
    wait=True,
    logs=False
)

clear_output()

In [96]:
result = s3_client.list_objects(Bucket=rawbucket, Prefix=baseline_results_prefix)
report_files = [report_file.get("Key") for report_file in result.get('Contents')]
print("Found Files:")
print("\n ".join(report_files))

baseline_job = my_default_monitor.latest_baselining_job
schema_df = pd.io.json.json_normalize(baseline_job.baseline_statistics().body_dict["features"])
schema_df.head()



Found Files:
sagemaker-modelmonitor/cc-training-job-1628518380/baselining/results/constraints.json
 sagemaker-modelmonitor/cc-training-job-1628518380/baselining/results/statistics.json


  


Unnamed: 0,name,inferred_type,numerical_statistics.common.num_present,numerical_statistics.common.num_missing,numerical_statistics.mean,numerical_statistics.sum,numerical_statistics.std_dev,numerical_statistics.min,numerical_statistics.max,numerical_statistics.distribution.kll.buckets,numerical_statistics.distribution.kll.sketch.parameters.c,numerical_statistics.distribution.kll.sketch.parameters.k,numerical_statistics.distribution.kll.sketch.data,string_statistics.common.num_present,string_statistics.common.num_missing,string_statistics.distinct_count
0,Label,Integral,24000.0,0.0,0.222458,5339.0,0.415897,0.0,1.0,"[{'lower_bound': 0.0, 'upper_bound': 0.1, 'cou...",0.64,2048.0,"[[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",,,
1,PAY_AMT1,String,,,,,,,,,,,,24000.0,0.0,6922.0
2,BILL_AMT1,Fractional,24000.0,0.0,0.237809,5707.419,0.080585,0.0,1.0,"[{'lower_bound': 0.0, 'upper_bound': 0.0999999...",0.64,2048.0,"[[0.19552189076210494, 0.2369590330493186, 0.4...",,,
3,LIMIT_BAL,Fractional,24000.0,0.0,167730.986667,4025544000.0,129479.698677,10000.0,800000.0,"[{'lower_bound': 10000.0, 'upper_bound': 89000...",0.64,2048.0,"[[30000.0, 120000.0, 200000.0, 130000.0, 28000...",,,
4,SEX,Fractional,24000.0,0.0,1.601167,38428.0,0.489658,1.0,2.0,"[{'lower_bound': 1.0, 'upper_bound': 1.1, 'cou...",0.64,2048.0,"[[1.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0,...",,,


In [97]:
constraints_df = pd.io.json.json_normalize(baseline_job.suggested_constraints().body_dict["features"])
constraints_df.head()

  """Entry point for launching an IPython kernel.


Unnamed: 0,name,inferred_type,completeness,num_constraints.is_non_negative
0,Label,Integral,1.0,True
1,PAY_AMT1,String,1.0,
2,BILL_AMT1,Fractional,1.0,True
3,LIMIT_BAL,Fractional,1.0,True
4,SEX,Fractional,1.0,True


#### Set up frequency for endpoint monitoring

Can use this approach to trigger alarms to let admins know when data drift has been detected

In [98]:
reports_prefix = '{}/reports'.format(prefix)
s3_report_path = 's3://{}/{}'.format(rawbucket,reports_prefix)

mon_schedule_name = 'Built-train-deploy-model-monitor-schedule-' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
# my_default_monitor.delete_monitoring_schedule()
my_default_monitor.create_monitoring_schedule(
    monitor_schedule_name=mon_schedule_name,
    endpoint_input=predictor.endpoint,
    output_s3_uri=s3_report_path,
    statistics=my_default_monitor.baseline_statistics(),
    constraints=my_default_monitor.suggested_constraints(),
    # every minute in the 4:00 hour
#     schedule_cron_expression='cron(0 16 * * * *)',
    schedule_cron_expression=CronExpressionGenerator.hourly(),
    enable_cloudwatch_metrics=True,

)

INFO:sagemaker:Creating monitoring schedule name Built-train-deploy-model-monitor-schedule-2021-08-09-17-13-04.



Creating Monitoring Schedule with name: Built-train-deploy-model-monitor-schedule-2021-08-09-17-13-04


## Evaluate Model Monitor against some sample data to see if Model Monitor can detect the change

In [99]:
COLS = data.columns
test_full = pd.read_csv('test_data.csv', names = ['Label'] +['PAY_AMT1','BILL_AMT1'] + list(COLS[1:])[:11] + list(COLS[1:])[12:17] + list(COLS[1:])[18:]
)
test_full.head()

Unnamed: 0,Label,PAY_AMT1,BILL_AMT1,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,...,BILL_AMT2,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6
0,0,-0.341476,0.201175,20000.0,1.0,1.0,2.0,33.0,1.0,2.0,...,17399.0,19057.0,18453.0,19755.0,19288.0,2260.0,0.0,1600.0,0.0,644.0
1,0,-0.136859,0.199594,20000.0,2.0,2.0,2.0,35.0,0.0,0.0,...,19347.0,18600.0,19000.0,19000.0,20000.0,0.0,1000.0,0.0,1000.0,0.0
2,0,-0.284364,0.185736,230000.0,2.0,1.0,1.0,44.0,1.0,-1.0,...,949.0,2864.0,933.0,0.0,0.0,2873.0,933.0,0.0,0.0,0.0
3,0,-0.040569,0.28936,100000.0,1.0,2.0,1.0,42.0,0.0,0.0,...,99998.0,16138.0,17758.0,18774.0,20272.0,2000.0,2000.0,2000.0,2000.0,2000.0
4,0,0.079132,0.186502,150000.0,1.0,1.0,2.0,29.0,-2.0,-2.0,...,6917.0,831.0,6469.0,5138.0,7810.0,833.0,6488.0,5153.0,7833.0,7130.0


In [100]:
faketestdata = test_full
faketestdata['EDUCATION'] = -faketestdata['EDUCATION'].astype(float)
faketestdata['BILL_AMT2']= (faketestdata['BILL_AMT2']//10).astype(float)
faketestdata['AGE']= (faketestdata['AGE']-10).astype(float)

faketestdata.head()
faketestdata.drop(columns=['Label']).to_csv('test-data-input-cols.csv', index = None, header=None)

INFO:numexpr.utils:NumExpr defaulting to 2 threads.


In [51]:
runtime_client = boto3.client('runtime.sagemaker')

# (just repeating code from above for convenience/ able to run this section independently)
def invoke_endpoint(ep_name, file_name, runtime_client):
    with open(file_name, 'r') as f:
        for row in f:
            payload = row.rstrip('\n')
            response = runtime_client.invoke_endpoint(EndpointName=ep_name,
                                          ContentType='text/csv', 
                                          Body=payload)
            time.sleep(1)
            
def invoke_endpoint_forever():
    while True:
        invoke_endpoint(endpoint_name, 'test-data-input-cols.csv', runtime_client)
        
thread = Thread(target = invoke_endpoint_forever)
thread.start()
# Note that you need to stop the kernel to stop the invocations

In [52]:
desc_schedule_result = my_default_monitor.describe_schedule()
print('Schedule status: {}'.format(desc_schedule_result['MonitoringScheduleStatus']))

Schedule status: Scheduled


In [53]:
mon_executions = my_default_monitor.list_executions()
print("We created ahourly schedule above and it will kick off executions ON the hour (plus 0 - 20 min buffer.\nWe will have to wait till we hit the hour...")

while len(mon_executions) == 0:
    print("Waiting for the 1st execution to happen...")
    time.sleep(600)
    mon_executions = my_default_monitor.list_executions()

No executions found for schedule. monitoring_schedule_name: Built-train-deploy-model-monitor-schedule-2021-07-15-00-29-34
We created ahourly schedule above and it will kick off executions ON the hour (plus 0 - 20 min buffer.
We will have to wait till we hit the hour...
Waiting for the 1st execution to happen...
No executions found for schedule. monitoring_schedule_name: Built-train-deploy-model-monitor-schedule-2021-07-15-00-29-34
Waiting for the 1st execution to happen...
No executions found for schedule. monitoring_schedule_name: Built-train-deploy-model-monitor-schedule-2021-07-15-00-29-34
Waiting for the 1st execution to happen...
No executions found for schedule. monitoring_schedule_name: Built-train-deploy-model-monitor-schedule-2021-07-15-00-29-34
Waiting for the 1st execution to happen...
Waiting for the 1st execution to happen...


In [54]:
mon_executions

[<sagemaker.model_monitor.model_monitoring.MonitoringExecution at 0x7f680d33f590>]

#### View the monitoring job in Endpoints --> Monitoring job history

In [55]:
latest_execution = mon_executions[-1] # latest execution's index is -1, second to last is -2 and so on..
time.sleep(60)
latest_execution.wait(logs=False)

print("Latest execution status: {}".format(latest_execution.describe()['ProcessingJobStatus']))
print("Latest execution result: {}".format(latest_execution.describe()['ExitMessage']))

latest_job = latest_execution.describe()
if (latest_job['ProcessingJobStatus'] != 'Completed'):
        print("====STOP==== \n No completed executions to inspect further. Please wait till an execution completes or investigate previously reported failures.")

!Latest execution status: Completed
Latest execution result: CompletedWithViolations: Job completed successfully with 3 violations.


#### Constraint_violations.json is a new file after detecting issues

In [56]:
report_uri=latest_execution.output.destination
from urllib.parse import urlparse
s3uri = urlparse(report_uri)
report_bucket = s3uri.netloc
report_key = s3uri.path.lstrip('/')
print('Report bucket: {}'.format(report_bucket))
print('Report key: {}'.format(report_key))

s3_client = boto3.Session().client('s3')
result = s3_client.list_objects(Bucket=rawbucket, Prefix=report_key)
report_files = [report_file.get("Key") for report_file in result.get('Contents')]
print("Found Report Files:")
print("\n ".join(report_files))

Report Uri: s3://sagemaker-us-east-1-657294665246/sagemaker-modelmonitor/reports/cc-training-job-1626307126/Built-train-deploy-model-monitor-schedule-2021-07-15-00-29-34/2021/07/15/01
Report bucket: sagemaker-us-east-1-657294665246
Report key: sagemaker-modelmonitor/reports/cc-training-job-1626307126/Built-train-deploy-model-monitor-schedule-2021-07-15-00-29-34/2021/07/15/01
Found Report Files:
sagemaker-modelmonitor/reports/cc-training-job-1626307126/Built-train-deploy-model-monitor-schedule-2021-07-15-00-29-34/2021/07/15/01/constraint_violations.json
 sagemaker-modelmonitor/reports/cc-training-job-1626307126/Built-train-deploy-model-monitor-schedule-2021-07-15-00-29-34/2021/07/15/01/constraints.json
 sagemaker-modelmonitor/reports/cc-training-job-1626307126/Built-train-deploy-model-monitor-schedule-2021-07-15-00-29-34/2021/07/15/01/statistics.json


## Delete resources

In [57]:
my_default_monitor.delete_monitoring_schedule()
time.sleep(10) # actually wait for the deletion


Deleting Monitoring Schedule with name: Built-train-deploy-model-monitor-schedule-2021-07-15-00-29-34


In [60]:
print(endpoint_name)
sm.delete_endpoint(EndpointName = endpoint_name)

cc-training-job-1626307126


ClientError: An error occurred (ValidationException) when calling the DeleteEndpoint operation: Could not find endpoint "arn:aws:sagemaker:us-east-1:657294665246:endpoint/cc-training-job-1626307126".

In [73]:
s3_bucket = 's3://sagemaker-us-east-1-' + account_num + '/sagemaker-modelmonitor/data'

In [74]:
%%sh
aws s3 rm --recursive {s3_bucket}


usage: aws s3 rm <S3Uri>
Error: Invalid argument type


CalledProcessError: Command 'b'aws s3 rm --recursive {s3_bucket}\n'' returned non-zero exit status 255.

In [None]:
# TODO: Hyperparameter tuning using: https://docs.aws.amazon.com/sagemaker/latest/dg/automatic-model-tuning-how-it-works.html