In [5]:
# Reading boston housing dataset
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
import pandas as pd

boston = load_boston()
df = pd.DataFrame(boston.data, columns=boston.feature_names)
df.describe() # describe dataset overview

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
count,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0
mean,3.613524,11.363636,11.136779,0.06917,0.554695,6.284634,68.574901,3.795043,9.549407,408.237154,18.455534,356.674032,12.653063
std,8.601545,23.322453,6.860353,0.253994,0.115878,0.702617,28.148861,2.10571,8.707259,168.537116,2.164946,91.294864,7.141062
min,0.00632,0.0,0.46,0.0,0.385,3.561,2.9,1.1296,1.0,187.0,12.6,0.32,1.73
25%,0.082045,0.0,5.19,0.0,0.449,5.8855,45.025,2.100175,4.0,279.0,17.4,375.3775,6.95
50%,0.25651,0.0,9.69,0.0,0.538,6.2085,77.5,3.20745,5.0,330.0,19.05,391.44,11.36
75%,3.677083,12.5,18.1,0.0,0.624,6.6235,94.075,5.188425,24.0,666.0,20.2,396.225,16.955
max,88.9762,100.0,27.74,1.0,0.871,8.78,100.0,12.1265,24.0,711.0,22.0,396.9,37.97


In [7]:
# save files as csv
import os

WORK_DIRECTORY='data'
os.makedirs('{}'.format(WORK_DIRECTORY), exist_ok=True)
df.to_csv('{}/boston_housing.csv'.format(WORK_DIRECTORY), header=False, index=False)

In [12]:
# S3 prefix
bucket = 'echo-workstream-dev-bucket'
prefix = '/echo-workstream-dev-bucket/sagemaker/sample'

# Import libraries
from sagemaker import get_execution_role
import boto3, sys, os
import sagemaker

sagemaker_session = sagemaker.Session()

# Get a SageMaker-compatible role used by this Notebook Instance.
role = get_execution_role()
my_region = boto3.session.Session().region_name # set the region of the instance
print("Execution role is " + role)
print("Success - the MySageMakerInstance is in the " + my_region + ".")

Execution role is arn:aws:iam::123456789012:role/app-sagemaker-role
Success - the MySageMakerInstance is in the ap-northeast-1.


In [13]:
s3 = boto3.resource('s3')

try:
    if my_region == 'ap-northeast-1':
        s3.create_bucket(Bucket=bucket)
    else:
        s3.create_bucket(Bucket=bucket, CreateBucketConfiguration={'LocationConstraint': my_region})
    print('S3 bucket created successfully')
except Exception as e:
    print('S3 error: ', e)

S3 error:  An error occurred (IllegalLocationConstraintException) when calling the CreateBucket operation: The unspecified location constraint is incompatible for the region specific endpoint this request was sent to.


In [14]:
# send data to S3.SageMaker will take training data from s3
training_path = sagemaker_session.upload_data(path='{}/boston_housing.csv'.format(WORK_DIRECTORY), bucket=bucket, key_prefix=prefix)
s3_train_data = 's3://{}/{}/{}'.format(bucket, prefix, WORK_DIRECTORY)
print('Uploaded training data location: {}'.format(s3_train_data))

output_location = 's3://{}/{}/output'.format(bucket, prefix)
print('Training artifacts will be uploaded to: {}'.format(output_location))

Uploaded training data location: s3://echo-workstream-dev-bucket/sagemaker/sample/data
Training artifacts will be uploaded to: s3://echo-workstream-dev-bucket/sagemaker/sample/output


In [15]:
# We use the Estimator from the SageMaker Python SDK
from sagemaker.sklearn.estimator import SKLearn

script_path = 'scikit_learn_script.py'

# Initialise SDK
sklearn_estimator = SKLearn(
        entry_point=script_path,
        role = role,
        train_instance_type="ml.c4.xlarge",
        sagemaker_session=sagemaker_session,
        output_path=output_location
)

print("Estimator object: {}".format(sklearn_estimator))

Estimator object: <sagemaker.sklearn.estimator.SKLearn object at 0x7fc7954f9160>


In [16]:
# Run model training job
sklearn_estimator.fit({'train': training_path})

2020-01-13 06:19:35 Starting - Starting the training job...
2020-01-13 06:19:37 Starting - Launching requested ML instances......
2020-01-13 06:20:40 Starting - Preparing the instances for training...
2020-01-13 06:21:18 Downloading - Downloading input data...
2020-01-13 06:22:03 Training - Training image download completed. Training in progress.
2020-01-13 06:22:03 Uploading - Uploading generated training model[34m2020-01-13 06:21:58,438 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2020-01-13 06:21:58,440 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2020-01-13 06:21:58,450 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2020-01-13 06:21:58,782 sagemaker-containers INFO     Module scikit_learn_script does not provide a setup.py. [0m
[34mGenerating setup.py[0m
[34m2020-01-13 06:21:58,782 sagemaker-containers INFO     Generating setup.cfg[0m
[34m2020-01-

In [17]:
# Deploy an estimator and endpoint
from sagemaker.predictor import csv_serializer, json_deserializer
predictor = sklearn_estimator.deploy(initial_instance_count=1, instance_type="ml.m4.xlarge", endpoint_name="sagemaker-terraform-test")

# Specify input and output formats.
predictor.content_type = 'text/csv'
predictor.serializer = csv_serializer
predictor.deserializer = json_deserializer

---------------------------------------------------------------------------!

In [18]:
# predictor.delete_endpoint()