In [1]:
###
###   SAGEMAKER LINEAR LEARNER - TRAINER PY CODE
###
#
#   Prior to running trainer - take steps below
#
#   If running from AWS Notebook then following steps needed.  If running Notebook from alternate source may need to ensure that AWS is established.  I have not attempted to run in separate Notebook yet
#   
#   Steps:
#   1. Need to upload iris.csv to the same Notebook folder as this code.   This will be changed to be a paramater passed to this program
#   2. Need to add your AWS Keys below
#   3. Need to make sure your AmazonSageMaker-ExecutionRole policy has S3 full access added to ExecutionRole policy
#   4. Need to run sm-dev-env-template code to produce S3 buckets for "train" & "model data".   Need to be placed created S3 buckets in S3 args parms in code below


### import packages for "Training" linear regression
import io
import os
import gzip
import pickle
import numpy as np
import pandas as pd
import urllib.request

import matplotlib.pyplot as plt
%matplotlib inline


In [2]:
### import Amazon sagemaker packages

import boto3 
import sagemaker
import sagemaker.amazon.common as smac
from sagemaker import get_execution_role
from sagemaker.predictor import csv_serializer, json_deserializer

In [3]:
#
#   Upload training file to S3 from Notebook github repository
#

#   Add AWS keys


import boto3
from botocore.exceptions import NoCredentialsError

ACCESS_KEY = 'AKIAIKWKVBK6NOZ5I6WQ' #arg
SECRET_KEY = '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' #arg

def upload_to_aws(local_file, bucket, s3_file):
    s3 = boto3.client('s3', aws_access_key_id=ACCESS_KEY,
                      aws_secret_access_key=SECRET_KEY)

    try:
        s3.upload_file(local_file, bucket, s3_file)
        print("Upload Successful")
        return True
    except FileNotFoundError:
        print("The file was not found")
        return False
    except NoCredentialsError:
        print("Credentials not available")
        return False

uploaded = upload_to_aws('iris.csv', 'iris-trainb3119a0e-42d2-4ac4-a9e8-f3458356f096', 'iris.csv')  ##args



Upload Successful


In [4]:
#
#   Read in training file
#


import boto3
# import numpy and pandas libraries for working with data
import numpy as np
import pandas as pd

bucket = "iris-trainb3119a0e-42d2-4ac4-a9e8-f3458356f096"  ##args
file_name = "iris.csv"  ##args

s3 = boto3.client('s3', aws_access_key_id=ACCESS_KEY,
                      aws_secret_access_key=SECRET_KEY)
# 's3' is a key word. create connection to S3 using default config and all buckets within S3

obj = s3.get_object(Bucket= bucket, Key= file_name) 
# get object and file (key) from bucket

train_set = pd.read_csv(obj['Body'], sep=',', encoding='latin1')
type(train_set)



pandas.core.frame.DataFrame

In [11]:
#   <<Need to add code to split file train and test>>

#type(train_set)
data = train_set.loc[:,['sepal_length', 'sepal_width','petal_length','petal_width','species']]
data.head(5)


# sepal_length, sepal_width, petal_length, petal_width
modelData = np.array(data.iloc[:, 0:4]).astype('float32')
# Actual Fantasy Points
target = np.array(data.iloc[:, 4]).astype('float32')
#Verify that the conversion worked
print(modelData[0])




[5.1 3.5 1.4 0.2]


In [15]:

# Create new sagemaker session
# Create buf to contain training data in sagemaker formatted file

sess = sagemaker.Session()
# S3 bucket to export results to
bucket = "iris-model-artifactsd506bcfd-74e6-437c-aefb-5c74b6dacd24"  ##args
prefix = "iris/test"  ##args
# Use the IO buffer as dataset is small
buf = io.BytesIO()
smac.write_numpy_to_dense_tensor(buf, modelData, target)
buf.seek(0)



0

In [16]:
#
#   Location of training data
#


key = 'linearlearner'
boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf)
s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key)
print('uploaded training data location: {}'.format(s3_train_data))


uploaded training data location: s3://iris-model-artifactsd506bcfd-74e6-437c-aefb-5c74b6dacd24/iris/test/train/linearlearner


In [17]:
#
#   Location of model output
#


output_location = 's3://{}/{}/output'.format(bucket, prefix)
print('training artifacts will be uploaded to: {}'.format(output_location))

training artifacts will be uploaded to: s3://iris-model-artifactsd506bcfd-74e6-437c-aefb-5c74b6dacd24/iris/test/output


In [18]:
#
#   Use built-in ML in region code is executing
#

region = boto3.Session().region_name
region

'us-east-1'

In [19]:
containers = {'us-west-2': '174872318107.dkr.ecr.us-west-2.amazonaws.com/linear-learner:latest',
              'us-east-1': '382416733822.dkr.ecr.us-east-1.amazonaws.com/linear-learner:latest',
              'us-east-2': '404615174143.dkr.ecr.us-east-2.amazonaws.com/linear-learner:latest',
              'eu-west-1': '438346466558.dkr.ecr.eu-west-1.amazonaws.com/linear-learner:latest'}
container = containers[region]

In [20]:
#sess = sagemaker.Session()


In [21]:
from sagemaker import get_execution_role
role = get_execution_role()
role

'arn:aws:iam::697525789718:role/service-role/AmazonSageMaker-ExecutionRole-20191019T150576'

In [22]:
linear = sagemaker.estimator.Estimator(container,
                                       role, 
                                       train_instance_count=1, #arg
                                       train_instance_type='ml.c4.xlarge',  #arg
                                       output_path=output_location,
                                       sagemaker_session=sess)

In [23]:
linear.set_hyperparameters(feature_dim=4,
                           predictor_type='regressor',
                           mini_batch_size=30,
                           normalize_data=False)



In [24]:
linear.fit({'train': s3_train_data})

2019-11-09 18:12:24 Starting - Starting the training job...
2019-11-09 18:12:25 Starting - Launching requested ML instances......
2019-11-09 18:13:30 Starting - Preparing the instances for training......
2019-11-09 18:14:46 Downloading - Downloading input data...
2019-11-09 18:15:24 Training - Training image download completed. Training in progress..[31mDocker entrypoint called with argument(s): train[0m
[31m[11/09/2019 18:15:26 INFO 140010908514112] Reading default configuration from /opt/amazon/lib/python2.7/site-packages/algorithm/resources/default-input.json: {u'loss_insensitivity': u'0.01', u'epochs': u'15', u'feature_dim': u'auto', u'init_bias': u'0.0', u'lr_scheduler_factor': u'auto', u'num_calibration_samples': u'10000000', u'accuracy_top_k': u'3', u'_num_kv_servers': u'auto', u'use_bias': u'true', u'num_point_for_scaler': u'10000', u'_log_level': u'info', u'quantile': u'0.5', u'bias_lr_mult': u'auto', u'lr_scheduler_step': u'auto', u'init_method': u'uniform', u'init_sigma':


2019-11-09 18:15:37 Uploading - Uploading generated training model
2019-11-09 18:15:37 Completed - Training job completed
Training seconds: 51
Billable seconds: 51


In [25]:
#
#   Create Model Endpoint
#

linear_predictor = linear.deploy(initial_instance_count=1,
                                 instance_type='ml.m4.xlarge')

---------------------------------------------------------------------------------------------------!

In [26]:
#  Validate the model and return JSON
from sagemaker.predictor import csv_serializer, json_deserializer

linear_predictor.content_type = 'text/csv'
linear_predictor.serializer = csv_serializer
linear_predictor.deserializer = json_deserializer

In [27]:
# Pass the first row of data to the predictor
result = linear_predictor.predict(modelData[0])
print(result)

{'predictions': [{'score': 0.9953638315200806}]}


In [28]:
predictions = []
for array in modelData:
    result = linear_predictor.predict(array)
    predictions += [r['score'] for r in result['predictions']]
predictions = np.array(predictions)
# Push into our pandas dataframe
#data['Predicted'] = predictions.astype(int))

In [29]:
print(predictions)

[0.99536383 1.1009649  0.95558572 1.0167582  0.93670905 1.10697508
 0.9303304  1.02846694 0.99875677 1.0600152  1.0401758  1.00291538
 1.03697968 0.81871951 0.96767688 0.97204578 0.99469793 1.03253853
 1.18207598 0.96507454 1.19184756 1.03409362 0.71719003 1.25478458
 1.08712316 1.18391395 1.1308856  1.05024362 1.05401862 1.03979349
 1.09844863 1.21005833 0.82200253 0.87969506 1.0600152  1.00794804
 1.07453656 1.0600152  0.93884313 1.05527735 0.97765887 1.22573924
 0.87515438 1.17339087 1.11452639 1.11132908 0.95596915 0.95684457
 1.01336539 1.03224218 2.97267866 2.79285192 3.07102585 2.62346125
 2.9751091  2.65820646 2.82751036 2.12274504 2.89572573 2.42475748
 2.33307171 2.63828039 2.67783403 2.82691717 2.34692812 2.8398838
 2.64205647 2.49306059 3.05767488 2.48416424 2.85453129 2.62510204
 3.10122919 2.78441238 2.7578969  2.84491777 3.07450438 3.15166759
 2.78114319 2.32967854 2.46112871 2.39588499 2.51127148 3.05042219
 2.58843565 2.65909576 2.96126604 2.95022249 2.45543003 2.55977