In [1]:
# Define IAM role
import boto3
import re

import os
import numpy as np
import pandas as pd
from sagemaker import get_execution_role

role = get_execution_role()

In [2]:
import sagemaker as sage
from time import gmtime, strftime

sess = sage.Session()

In [3]:
train_data = pd.read_csv('./mall.csv')

In [4]:
X = train_data.iloc[:,[3,4]].values

In [5]:
# Convert nparry to csv
print(X)
np.savetxt('./processed_data.csv', X, delimiter=',',fmt='%d')

[[ 15  39]
 [ 15  81]
 [ 16   6]
 [ 16  77]
 [ 17  40]
 [ 17  76]
 [ 18   6]
 [ 18  94]
 [ 19   3]
 [ 19  72]
 [ 19  14]
 [ 19  99]
 [ 20  15]
 [ 20  77]
 [ 20  13]
 [ 20  79]
 [ 21  35]
 [ 21  66]
 [ 23  29]
 [ 23  98]
 [ 24  35]
 [ 24  73]
 [ 25   5]
 [ 25  73]
 [ 28  14]
 [ 28  82]
 [ 28  32]
 [ 28  61]
 [ 29  31]
 [ 29  87]
 [ 30   4]
 [ 30  73]
 [ 33   4]
 [ 33  92]
 [ 33  14]
 [ 33  81]
 [ 34  17]
 [ 34  73]
 [ 37  26]
 [ 37  75]
 [ 38  35]
 [ 38  92]
 [ 39  36]
 [ 39  61]
 [ 39  28]
 [ 39  65]
 [ 40  55]
 [ 40  47]
 [ 40  42]
 [ 40  42]
 [ 42  52]
 [ 42  60]
 [ 43  54]
 [ 43  60]
 [ 43  45]
 [ 43  41]
 [ 44  50]
 [ 44  46]
 [ 46  51]
 [ 46  46]
 [ 46  56]
 [ 46  55]
 [ 47  52]
 [ 47  59]
 [ 48  51]
 [ 48  59]
 [ 48  50]
 [ 48  48]
 [ 48  59]
 [ 48  47]
 [ 49  55]
 [ 49  42]
 [ 50  49]
 [ 50  56]
 [ 54  47]
 [ 54  54]
 [ 54  53]
 [ 54  48]
 [ 54  52]
 [ 54  42]
 [ 54  51]
 [ 54  55]
 [ 54  41]
 [ 54  44]
 [ 54  57]
 [ 54  46]
 [ 57  58]
 [ 57  55]
 [ 58  60]
 [ 58  46]
 [ 59  55]

In [6]:
# Upload the processed csv to S3

from sagemaker import s3
s3.S3Uploader.upload('processed_data.csv','s3://sampal-pi5/output', kms_key=None)



's3://sampal-pi5/output/processed_data.csv'

### Train

Create a SageMaker estimator with custom hyperparameters and start the training job

In [7]:
account = sess.boto_session.client('sts').get_caller_identity()['Account']
region = sess.boto_session.region_name
image = '{}.dkr.ecr.{}.amazonaws.com/sage-kmeans:latest'.format(account, region)
model = sage.estimator.Estimator(image,
                       role, 
                       train_instance_count=1, 
                       train_instance_type='ml.m5.large',
                       output_path="s3://sampal-pi5/output",
                       hyperparameters={'n_clusters': 5},
                       sagemaker_session=sess)



In [8]:
model.fit('s3://sampal-pi5/input/mall.csv')

2020-03-30 19:34:15 Starting - Starting the training job...
2020-03-30 19:34:17 Starting - Launching requested ML instances............
2020-03-30 19:36:24 Starting - Preparing the instances for training......
2020-03-30 19:37:41 Downloading - Downloading input data...
2020-03-30 19:38:14 Training - Downloading the training image...
2020-03-30 19:38:44 Uploading - Uploading generated training model.[34mStarting the training.[0m
[34m5[0m
[34mTraining complete.[0m

2020-03-30 19:38:50 Completed - Training job completed
Training seconds: 69
Billable seconds: 69


### Deploy

Create a deployment endpoint

In [9]:
from sagemaker.predictor import csv_serializer
predictor = model.deploy(1, 'ml.m4.xlarge', serializer=csv_serializer)

-------------!

### Inference

Run inference against the endpoint

In [12]:
#predictor.predict(train_data.values).decode('utf-8')

predictor.predict(train_data.values)

b'3\n4\n3\n4\n3\n4\n3\n4\n3\n4\n3\n4\n3\n4\n3\n4\n3\n4\n3\n4\n3\n4\n3\n4\n3\n4\n3\n4\n3\n4\n3\n4\n3\n4\n3\n4\n3\n4\n3\n4\n3\n4\n1\n4\n3\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n2\n0\n2\n1\n2\n0\n2\n0\n2\n1\n2\n0\n2\n0\n2\n0\n2\n0\n2\n1\n2\n0\n2\n0\n2\n0\n2\n0\n2\n0\n2\n0\n2\n0\n2\n0\n2\n0\n2\n0\n2\n0\n2\n0\n2\n0\n2\n0\n2\n0\n2\n0\n2\n0\n2\n0\n2\n0\n2\n0\n2\n0\n2\n0\n2\n0\n2\n0\n2\n0\n2\n0\n2\n0\n2\n0\n2\n'

In [None]:
# runtime = boto3.Session().client('sagemaker-runtime')
# endpoint='sage-kmeans-2020-03-28-01-25-29-970'
# import io
# from io import StringIO
# test_file = io.StringIO()
# train_data.to_csv(test_file)
# response = runtime.invoke_endpoint(EndpointName=endpoint, ContentType='text/csv', Body=test_file.getvalue())
# type(response)

In [None]:
# import json
# result = json.loads(response['Body'].read().decode())
# print (result)

In [None]:
# print(response['Body'].read().decode())

In [None]:
# response_payload = json.loads(response['Body'].read().decode("utf-8"))

# print ("response_payload: {}".format(response_payload))

In [None]:
# !curl https://runtime.sagemaker.us-east-1.amazonaws.com/endpoints/sage-kmeans-2020-03-28-01-25-29-970/invocations

### Clean Up

Delete endpoint

In [13]:
sess.delete_endpoint(predictor.endpoint)