# SageMaker BYOD

Bring your own docker models to SageMaker

### Load modules
Load modules. Get IAM role and session

In [47]:
import boto3
import re

import os
import numpy as np
import pandas as pd
from sagemaker import get_execution_role

import sagemaker as sage
from time import gmtime, strftime

from sagemaker import s3


In [48]:
role = get_execution_role()
sess = sage.Session()

### Setup S3 buckets

Setup input and output buckets

In [None]:
output_path=s3://{bucketname}/{prefix}/
input_path=s3://{bucketname}/{prefix}/

### Get raw data
Get raw data and copy locally for preprocessing

In [49]:
raw_data = pd.read_csv('./mall.csv')

In [50]:
train_data = raw_data.iloc[:,[3,4]].values

### Upload to S3
Upload preprocessed data back to S3

In [51]:
# Convert nparry to csv
np.savetxt('./processed_data.csv', train_data, delimiter=',',fmt='%d')

In [52]:
# Upload the processed csv to S3
s3.S3Uploader.upload('processed_data.csv',output_path, kms_key=None)

's3://sampal-pi5/output/processed_data.csv'

### Train

Create a SageMaker estimator with custom hyperparameters and start the training job

In [53]:
account = sess.boto_session.client('sts').get_caller_identity()['Account']
region = sess.boto_session.region_name
image = '{}.dkr.ecr.{}.amazonaws.com/sage-kmeans:latest'.format(account, region)
model = sage.estimator.Estimator(image,
                       role, 
                       train_instance_count=1, 
                       train_instance_type='ml.m5.large',
                       output_path=output_path,
                       hyperparameters={'n_clusters': 5},
                       sagemaker_session=sess)



In [54]:
model.fit(output_path+'processed_data.csv')

2020-04-02 19:13:30 Starting - Starting the training job...
2020-04-02 19:13:33 Starting - Launching requested ML instances......
2020-04-02 19:14:48 Starting - Preparing the instances for training...
2020-04-02 19:15:30 Downloading - Downloading input data...
2020-04-02 19:15:36 Training - Downloading the training image..[34mStarting the training.[0m
[34mHyperparamter n_clusters: 5[0m
[34mTraining complete.[0m

2020-04-02 19:16:12 Uploading - Uploading generated training model
2020-04-02 19:16:12 Completed - Training job completed
Training seconds: 42
Billable seconds: 42


### Deploy

Create a deployment endpoint

In [55]:
from sagemaker.predictor import csv_serializer
predictor = model.deploy(1, 'ml.m4.xlarge', serializer=csv_serializer)

-----------------!

### Inference

Run inference against the endpoint

In [59]:
#predictor.predict(train_data.values).decode('utf-8')
data = pd.read_csv(output_path+'processed_data.csv')
predictor.predict(data.values)

b'0\n3\n0\n3\n0\n3\n0\n3\n0\n3\n0\n3\n0\n3\n0\n3\n0\n3\n0\n3\n0\n3\n0\n3\n0\n3\n0\n3\n0\n3\n0\n3\n0\n3\n0\n3\n0\n3\n0\n3\n0\n2\n0\n3\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n4\n1\n4\n2\n4\n1\n4\n1\n4\n2\n4\n1\n4\n1\n4\n1\n4\n1\n4\n2\n4\n1\n4\n1\n4\n1\n4\n1\n4\n1\n4\n1\n4\n1\n4\n1\n4\n1\n4\n1\n4\n1\n4\n1\n4\n1\n4\n1\n4\n1\n4\n1\n4\n1\n4\n1\n4\n1\n4\n1\n4\n1\n4\n1\n4\n1\n4\n1\n4\n1\n4\n1\n4\n1\n4\n1\n4\n'

In [41]:
# runtime = boto3.Session().client('sagemaker-runtime')
# endpoint='sage-kmeans-2020-03-28-01-25-29-970'
# import io
# from io import StringIO
# test_file = io.StringIO()
# train_data.to_csv(test_file)
# response = runtime.invoke_endpoint(EndpointName=endpoint, ContentType='text/csv', Body=test_file.getvalue())
# type(response)

In [42]:
# import json
# result = json.loads(response['Body'].read().decode())
# print (result)

In [43]:
# print(response['Body'].read().decode())

In [44]:
# response_payload = json.loads(response['Body'].read().decode("utf-8"))

# print ("response_payload: {}".format(response_payload))

In [45]:
# !curl https://runtime.sagemaker.us-east-1.amazonaws.com/endpoints/sage-kmeans-2020-03-28-01-25-29-970/invocations

### Clean Up

Delete endpoint

In [60]:
sess.delete_endpoint(predictor.endpoint)