In [9]:
import boto3

from sagemaker import session
from sagemaker.predictor import Predictor
from sagemaker.serializers import CSVSerializer
from sklearn.datasets import load_iris

import io
import numpy as np
import pandas as pd
import random

import time

sm_session = session.Session(boto3.Session())

# S3 bucket
protocol = 's3://'
bucket = 'sagemaker-third-party-models'
endpoint_name = 'third-party-model-endpoint'

batch_size = 10

#### Uploading the Iris dataset to S3

In [10]:
# S3 client
s3_client = boto3.client('s3')

# Load the iris dataset
iris_data = load_iris()

# Add to DataFrame
cols = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
iris_df = pd.DataFrame(iris_data.data, columns=cols)

# Insert target label as first column
labels = iris_data.target_names[iris_data['target']]
iris_df.insert(loc=0, column='class', value=labels)

# Load it in-memory (since it is a small dataset) 
data_stream = io.StringIO()
iris_df.to_csv(data_stream, sep=',', encoding='utf-8', index=False)

# Get stream data from memory
iris_csv = data_stream.getvalue()

In [None]:
# Upload to S3 bucket
key = 'iris/train/train.csv'
s3_client.put_object(Body=iris_csv,
                     Bucket=bucket, 
                     Key=key, 
                     ContentType='text/csv')

#### Create Predictor endpoint

In [12]:
predictor = Predictor(endpoint_name=endpoint_name, 
                      sagemaker_session=None, 
                      serializer=CSVSerializer()) # Modify based on model 

#### Generate baseline data to trigger 'No Issues'

In [13]:
# Remove class label column
if('class' in iris_df.columns): 
    iris_df = iris_df.drop(columns=['class'])

# Convert dataframe to list
samples = iris_df.values.tolist()

# Remove the header row
samples = samples[1:]

# Shuffle the samples 
random.shuffle(samples)

# Invoke real-time inference endpoint using baseline data
for index, sample in enumerate(samples[0:batch_size]):
        
    # Defensive coding
    if(len(sample) > 0):
        
        # Invoke the model's inference endpoint
        response = predictor.predict(sample).decode('utf-8') 
        
        # Display the model's classification prediction
        print('Sample {0} >> Input: {1}: >> Prediction: {2}'.format(index, sample, response))
        
        # Suspends execution for 500 milliseconds
        time.sleep(0.5)

print('Completed!')

Sample 0 >> Input: [5.0, 2.3, 3.3, 1.0]: >> Prediction: versicolor

Sample 1 >> Input: [5.8, 2.7, 3.9, 1.2]: >> Prediction: versicolor

Sample 2 >> Input: [6.3, 3.4, 5.6, 2.4]: >> Prediction: virginica

Sample 3 >> Input: [6.0, 2.2, 5.0, 1.5]: >> Prediction: virginica

Sample 4 >> Input: [5.2, 3.4, 1.4, 0.2]: >> Prediction: setosa

Sample 5 >> Input: [5.7, 2.8, 4.5, 1.3]: >> Prediction: versicolor

Sample 6 >> Input: [7.7, 2.6, 6.9, 2.3]: >> Prediction: virginica

Sample 7 >> Input: [4.6, 3.4, 1.4, 0.3]: >> Prediction: setosa

Sample 8 >> Input: [5.0, 3.4, 1.6, 0.4]: >> Prediction: setosa

Sample 9 >> Input: [6.3, 2.5, 5.0, 1.9]: >> Prediction: virginica

Completed!


#### Generate data to induce data quality constraint violations

In [14]:
# Generate values outside the normal baseline data type and distribution
sample_drift_data = np.random.uniform(low=-5, high=5, size=(batch_size, 4)).astype('i')

# Convert array to comma seperated list
sample_drift_data = [",".join(item) for item in sample_drift_data.astype(str)]

# Invoke real-time inference endpoint to simulate data drift
for index, sample in enumerate(sample_drift_data):
    
    # Defensive coding
    if(len(sample) > 0):
               
        # Invoke the model's inference endpoint
        response = predictor.predict(sample).decode('utf-8')
        
        # Display the model's classification prediction
        print('Sample {0} >> Input: {1}: >> Prediction: {2}'.format(index, sample, response))
        
        # Suspends execution for 500 milliseconds
        time.sleep(0.5)

print('Completed!')

Sample 0 >> Input: 2,4,1,-3: >> Prediction: setosa

Sample 1 >> Input: 0,0,-4,3: >> Prediction: virginica

Sample 2 >> Input: -4,2,-4,2: >> Prediction: virginica

Sample 3 >> Input: 4,-4,-3,-3: >> Prediction: setosa

Sample 4 >> Input: 2,-2,2,3: >> Prediction: virginica

Sample 5 >> Input: 1,-4,3,-4: >> Prediction: setosa

Sample 6 >> Input: 2,-1,4,0: >> Prediction: setosa

Sample 7 >> Input: 2,-3,-4,-1: >> Prediction: setosa

Sample 8 >> Input: 3,-3,2,1: >> Prediction: versicolor

Sample 9 >> Input: 1,0,-2,4: >> Prediction: virginica

Completed!


#### Monitoring Schedule management

In [15]:
# !aws sagemaker list-monitoring-schedules
# !aws sagemaker describe-monitoring-schedule --monitoring-schedule-name 'third-party-model-data-quality-schedule'
# !aws sagemaker list-monitoring-executions --monitoring-schedule-name 'third-party-model-data-quality-schedule'


#### Resource Cleanup

In [16]:
# Step 1.
# print('Stopping monitoring schedule...')
# !aws sagemaker stop-monitoring-schedule --monitoring-schedule-name 'third-party-model-data-quality-schedule'
# time.sleep(30) # allow time for processing
# !aws sagemaker list-monitoring-schedules --endpoint-name 'third-party-model-endpoint'

# Step 2.
# print('Deleting monitoring schedule...')
# !aws sagemaker delete-monitoring-schedule --monitoring-schedule-name 'third-party-model-data-quality-schedule'
# time.sleep(30) # allow time for processing
# !aws sagemaker list-monitoring-schedules --endpoint-name 'third-party-model-endpoint'

# Step 3.
# print('Deleting model endpoint...')
# !aws sagemaker delete-endpoint --endpoint-name 'third-party-model-endpoint'
# time.sleep(30) # allow time for processing
# !aws sagemaker list-endpoints --name-contains 'third-party-model-endpoint'

# Step 4.
# print('Deleting model endpoint config...')
# !aws sagemaker delete-endpoint-config --endpoint-config-name 'third-party-model-endpoint-config'
# time.sleep(30) # allow time for processing
# !aws sagemaker list-endpoint-configs --name-contains 'third-party-model-endpoint-config'

# Step 5.
# print('Deleting model...')
# !aws sagemaker delete-model --model-name 'third-party-model'
# time.sleep(30) # allow time for processing
# !aws sagemaker list-models --name-contains 'third-party-model'