In [1]:
###
###   SAGEMAKER LINEAR LEARNER - TRAINER PY CODE
###
#
#   Prior to running trainer - take steps below
#
#   If running from AWS Notebook then following steps needed.  If running Notebook from alternate source may need to ensure that AWS is established.  I have not attempted to run in separate Notebook yet
#   
#   Steps:
#   1. Need to upload iris.csv to the same Notebook folder as this code.   This will be changed to be a paramater passed to this program
#   2. Need to add your AWS Keys below
#   3. Need to make sure your AmazonSageMaker-ExecutionRole policy has S3 full access added to ExecutionRole policy
#   4. Need to run sm-dev-env-template code to produce S3 buckets for "train" & "model data".   Need to be placed created S3 buckets in S3 args parms in code below


### import packages for "Training" linear regression
import io
import os
import gzip
import pickle
import numpy as np
import pandas as pd
import urllib.request

import matplotlib.pyplot as plt
%matplotlib inline


In [29]:
### import Amazon sagemaker packages

import boto3 
import sagemaker
import sagemaker.amazon.common as smac
from sagemaker import get_execution_role
from sagemaker.predictor import csv_serializer, json_deserializer

In [17]:
#
#   Upload training file to S3 from Notebook github repository
#

#   Add AWS keys


import boto3
from botocore.exceptions import NoCredentialsError

ACCESS_KEY = 'YOUR ACCESS KEY' #arg
SECRET_KEY = 'YOUR SECRET KEY' #arg

def upload_to_aws(local_file, bucket, s3_file):
    s3 = boto3.client('s3', aws_access_key_id=ACCESS_KEY,
                      aws_secret_access_key=SECRET_KEY)

    try:
        s3.upload_file(local_file, bucket, s3_file)
        print("Upload Successful")
        return True
    except FileNotFoundError:
        print("The file was not found")
        return False
    except NoCredentialsError:
        print("Credentials not available")
        return False

uploaded = upload_to_aws('iris.csv', 'iris-trainb3119a0e-42d2-4ac4-a9e8-f3458356f096', 'iris.csv')  ##args


Upload Successful


In [64]:
#
#   Read in training file
#


import boto3
# import numpy and pandas libraries for working with data
import numpy as np
import pandas as pd

bucket = "iris-trainb3119a0e-42d2-4ac4-a9e8-f3458356f096"  ##args
file_name = "iris.csv"  ##args

s3 = boto3.client('s3', aws_access_key_id=ACCESS_KEY,
                      aws_secret_access_key=SECRET_KEY)
# 's3' is a key word. create connection to S3 using default config and all buckets within S3

obj = s3.get_object(Bucket= bucket, Key= file_name) 
# get object and file (key) from bucket

train_set = pd.read_csv(obj['Body'], sep=',', encoding='latin1')
type(train_set)



pandas.core.frame.DataFrame

In [79]:


#type(train_set)
data = train_set.loc[:,['species','sepal_length', 'sepal_width','petal_length','petal_width']]
data.head(5)




# sepal_length, sepal_width, petal_length, petal_width
modelData = np.array(data.iloc[:, 1:5]).astype('float32')
# Actual Fantasy Points
target = np.array(data.iloc[:, 0]).astype('float32')
#Verify that the conversion worked
print(modelData[0])



[5.1 3.5 1.4 0.2]


In [80]:

# Create new sagemaker session
# Create buf to contain training data in sagemaker formatted file

sess = sagemaker.Session()
# S3 bucket to export results to
bucket = "iris-model-artifactsd506bcfd-74e6-437c-aefb-5c74b6dacd24"  ##args
prefix = "iris/test"  ##args
# Use the IO buffer as dataset is small
buf = io.BytesIO()
smac.write_numpy_to_dense_tensor(buf, modelData, target)
buf.seek(0)



0

In [81]:
#
#   Location of training data
#


key = 'linearlearner'
boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf)
s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key)
print('uploaded training data location: {}'.format(s3_train_data))


uploaded training data location: s3://iris-model-artifactsd506bcfd-74e6-437c-aefb-5c74b6dacd24/iris/test/train/linearlearner


In [82]:
#
#   Location of model output
#


output_location = 's3://{}/{}/output'.format(bucket, prefix)
print('training artifacts will be uploaded to: {}'.format(output_location))

training artifacts will be uploaded to: s3://iris-model-artifactsd506bcfd-74e6-437c-aefb-5c74b6dacd24/iris/test/output


In [83]:
#
#   Use built-in ML in region code is executing
#

region = boto3.Session().region_name
region

'us-east-1'

In [84]:
containers = {'us-west-2': '174872318107.dkr.ecr.us-west-2.amazonaws.com/linear-learner:latest',
              'us-east-1': '382416733822.dkr.ecr.us-east-1.amazonaws.com/linear-learner:latest',
              'us-east-2': '404615174143.dkr.ecr.us-east-2.amazonaws.com/linear-learner:latest',
              'eu-west-1': '438346466558.dkr.ecr.eu-west-1.amazonaws.com/linear-learner:latest'}
container = containers[region]

In [45]:
#sess = sagemaker.Session()


In [85]:
from sagemaker import get_execution_role
role = get_execution_role()
role

'arn:aws:iam::697525789718:role/service-role/AmazonSageMaker-ExecutionRole-20191019T150576'

In [89]:
linear = sagemaker.estimator.Estimator(container,
                                       role, 
                                       train_instance_count=1, #arg
                                       train_instance_type='ml.c4.xlarge',  #arg
                                       output_path=output_location,
                                       sagemaker_session=sess)

In [90]:
linear.set_hyperparameters(feature_dim=4,
                           predictor_type='regressor',
                           mini_batch_size=50,
                           normalize_data=False)

In [91]:
linear.fit({'train': s3_train_data})

2019-11-09 02:00:39 Starting - Starting the training job...
2019-11-09 02:00:40 Starting - Launching requested ML instances......
2019-11-09 02:01:47 Starting - Preparing the instances for training......
2019-11-09 02:03:03 Downloading - Downloading input data...
2019-11-09 02:03:41 Training - Training image download completed. Training in progress..[31mDocker entrypoint called with argument(s): train[0m
[31m[11/09/2019 02:03:46 INFO 139936976090944] Reading default configuration from /opt/amazon/lib/python2.7/site-packages/algorithm/resources/default-input.json: {u'loss_insensitivity': u'0.01', u'epochs': u'15', u'feature_dim': u'auto', u'init_bias': u'0.0', u'lr_scheduler_factor': u'auto', u'num_calibration_samples': u'10000000', u'accuracy_top_k': u'3', u'_num_kv_servers': u'auto', u'use_bias': u'true', u'num_point_for_scaler': u'10000', u'_log_level': u'info', u'quantile': u'0.5', u'bias_lr_mult': u'auto', u'lr_scheduler_step': u'auto', u'init_method': u'uniform', u'init_sigma':


2019-11-09 02:03:58 Uploading - Uploading generated training model
2019-11-09 02:03:58 Completed - Training job completed
Training seconds: 55
Billable seconds: 55
