In [None]:
import boto3
from sagemaker import session

from sagemaker.predictor import Predictor
from sagemaker.deserializers import CSVDeserializer
from sagemaker.serializers import CSVSerializer

sm_session = session.Session(boto3.Session())

# S3 bucket
protocol = 's3://'
bucket = ''
endpoint_name = 'third-party-model-endpoint'

#### Uploading the Iris dataset to S3

In [None]:
import io
import pandas as pd
from sklearn.datasets import load_iris
import boto3

# S3 client
s3_client = boto3.client('s3')

# Load the iris dataset
iris_data = load_iris()

# Add to DataFrame
cols = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
iris_df = pd.DataFrame(iris.data, columns=cols)

# Insert target label as first column
labels = iris_data.target_names[iris_data['target']]
iris_df.insert(loc=0, column='class', value=labels)

# Load it in-memory (since it is a small dataset) 
data_stream = io.StringIO()
iris_df.to_csv(data_stream, sep=',', encoding='utf-8', index=False)

# Get stream data from memory
iris_csv = data_stream.getvalue()

# Return a list of the lines in the string
samples = iris_csv.splitlines()

In [None]:
# Upload to S3 bucket
key = 'third-party-model/train/data/train.csv'
s3_client.put_object(Body=iris_csv,
                     Bucket=bucket, 
                     Key=key, 
                     ContentType='text/csv')

#### Create Predictor endpoint

In [None]:
predictor = Predictor(endpoint_name=endpoint_name, 
                      sagemaker_session=None, 
                      serializer=CSVSerializer()) # Modify based on model 

#### Generate baseline data to trigger 'No Issues'

In [None]:
import time
import random

# Get stream data from memory
iris_csv = data_stream.getvalue()

# Return a list of the lines in the string
samples = (iris_csv.splitlines())

# Remove class_label column
samples = [element.split(',') for element in samples]
_ = [element.pop(0) for element in samples]

# Remove the header row
samples = samples[1:]

# Convert string to float for expected inference datatype
samples = [[float(item) for item in group] for group in samples]

# Shuffle the samples 
random.shuffle(samples)

# Invoke real-time inference endpoint using baseline data
for index, sample in enumerate(samples):
        
    # Defensive coding
    if(len(sample) > 0):
        
        # Invoke the model's inference endpoint
        response = predictor.predict(data=sample)
        
        # Decode bytes to string
        response = response.decode('utf-8') 
        
        # Display the model's classification prediction
        print('Sample {0} >> Input: {1}: >> Prediction: {2}'.format(index, sample, response))
        
        # Suspends execution for 500 milliseconds
        time.sleep(0.5)

print('Completed!')

#### Generate data to induce data quality constraint violations

In [None]:
import numpy as np

# Generate values outside the normal baseline data type and distribution
sample_drift_data = np.random.uniform(low=-5, high=5, size=(200,4)).astype('i')

# Convert array to comma seperated list
sample_drift_data = [",".join(item) for item in sample_drift_data.astype(str)]

# Invoke real-time inference endpoint to simulate data drift
for index, sample in enumerate(sample_drift_data):
    
    # Defensive coding
    if(len(sample) > 0):
               
        # Invoke the model's inference endpoint
        response = predictor.predict(data=sample)
        
        # Decode bytes to string
        response = response.decode('utf-8')         
        
        # Display the model's classification prediction
        print('Sample {0} >> Input: {1}: >> Prediction: {2}'.format(index, sample, response))
        
        # Suspends execution for 500 milliseconds
        time.sleep(0.5)

print('Completed!')

#### Monitoring Schedule management

In [None]:
# !aws sagemaker list-monitoring-schedules
# !aws sagemaker describe-monitoring-schedule --monitoring-schedule-name 'third-party-model-data-quality-schedule'
# !aws sagemaker list-monitoring-executions --monitoring-schedule-name 'third-party-model-data-quality-schedule'


#### Resource Cleanup

In [None]:
# Step 1.
# !aws sagemaker stop-monitoring-schedule --monitoring-schedule-name 'third-party-model-data-quality-schedule'
# time.sleep(30) # allow time for processing
# !aws sagemaker list-monitoring-schedules --endpoint-name 'third-party-model-endpoint'

# Step 2.
# !aws sagemaker delete-monitoring-schedule --monitoring-schedule-name 'third-party-model-data-quality-schedule'
# time.sleep(30) # allow time for processing
# !aws sagemaker list-monitoring-schedules --endpoint-name 'third-party-model-endpoint'

# Step 3.
# !aws sagemaker delete-endpoint --endpoint-name 'third-party-model-endpoint'
# time.sleep(30) # allow time for processing
# !aws sagemaker list-endpoints --name-contains 'third-party-model-endpoint'

# Step 4.
# !aws sagemaker delete-endpoint-config --endpoint-config-name 'third-party-model-endpoint-config'
# time.sleep(30) # allow time for processing
# !aws sagemaker list-endpoint-configs --name-contains 'third-party-model-endpoint-config'

# Step 5.
# !aws sagemaker delete-model --model-name 'third-party-model'
# time.sleep(30) # allow time for processing
# !aws sagemaker list-models --name-contains 'third-party-model'