# Build, Train, Deploy RoboticCM with AWS

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import seaborn as sns
sns.set(style="white")

In [4]:
import boto3
from sagemaker import get_execution_role

role = get_execution_role()

region = boto3.Session().region_name

bucket='sagemaker-robotic-cm-bot'
prefix = 'linear-svc' # Used as part of the path in the bucket where you store data
bucket_path = 'https://s3-{}.amazonaws.com/{}'.format(region,bucket) # The URL to access the bucket

raw_pipe_line_data = 's3://{}/{}'.format(bucket, 'training-data.csv') 

print(raw_pipe_line_data)

s3://sagemaker-robotic-cm-bot/training-data.csv


## Prepare Data

In [5]:
roboticcm = pd.read_csv(raw_pipe_line_data)

In [10]:
features = roboticcm.drop('Sussessful', 1) # the name of result field.
labels = roboticcm['Sussessful'] # *** he name of result field.

train, test, train_labels, test_labels = train_test_split(features,
                                                          labels,
                                                          test_size=0.33, random_state=42)

In [11]:
from io import StringIO

test_csv_buffer = StringIO()
train_csv_buffer = StringIO()
pd.concat([test_labels, test], axis=1).to_csv(test_csv_buffer, header=True, index=False)
pd.concat([train_labels, train], axis=1).to_csv(train_csv_buffer, header=True, index=False)

s3_resource = boto3.resource('s3')
s3_resource.Object(bucket, prefix + '/train.csv').put(Body=train_csv_buffer.getvalue())
s3_resource.Object(bucket, prefix + '/validation.csv').put(Body=test_csv_buffer.getvalue())

{'ResponseMetadata': {'RequestId': 'F453412BCFB44309',
  'HostId': 'aF42vDHkYEhxNb9kLWSqV9na6gH6NZ3J3PjfHY1mpRGrNCJ4IlYehlglPwkhQfvSwyD94Md1SQA=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'aF42vDHkYEhxNb9kLWSqV9na6gH6NZ3J3PjfHY1mpRGrNCJ4IlYehlglPwkhQfvSwyD94Md1SQA=',
   'x-amz-request-id': 'F453412BCFB44309',
   'date': 'Tue, 09 Jun 2020 15:13:08 GMT',
   'x-amz-server-side-encryption': 'AES256',
   'etag': '"358ba71f4f7d47d9334fa083f1349227"',
   'content-length': '0',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'ETag': '"358ba71f4f7d47d9334fa083f1349227"',
 'ServerSideEncryption': 'AES256'}

## Train Model

In [12]:
train_data = 's3://{}/{}/{}'.format(bucket, prefix, 'train.csv')

validation_data = 's3://{}/{}/{}'.format(bucket, prefix, 'validation.csv')

s3_output_location = 's3://{}/{}/{}'.format(bucket, prefix, 'xgboost_model_sdk')

In [13]:
import sagemaker

from sagemaker.amazon.amazon_estimator import get_image_uri

container = get_image_uri(boto3.Session().region_name, 'xgboost', '0.90-1')

In [14]:
xgb_model = sagemaker.estimator.Estimator(container,
                                         role, 
                                         train_instance_count=1, 
                                         train_instance_type='ml.m4.xlarge',
                                         train_volume_size = 5,
                                         output_path=s3_output_location,
                                         sagemaker_session=sagemaker.Session())

In [15]:
xgb_model.set_hyperparameters(max_depth = 5,
                              eta = .2,
                              gamma = 4,
                              min_child_weight = 6,
                              silent = 0,
                              objective = 'multi:softmax',
                              num_class = 2,
                              num_round = 10)

In [16]:
train_channel = sagemaker.session.s3_input(train_data, content_type='text/csv')
valid_channel = sagemaker.session.s3_input(validation_data, content_type='text/csv')

data_channels = {'train': train_channel, 'validation': valid_channel}

In [17]:
xgb_model.fit(inputs=data_channels,  logs=True)

2020-06-09 15:13:21 Starting - Starting the training job...
2020-06-09 15:13:23 Starting - Launching requested ML instances......
2020-06-09 15:14:42 Starting - Preparing the instances for training......
2020-06-09 15:15:43 Downloading - Downloading input data...
2020-06-09 15:16:12 Training - Downloading the training image..[34mINFO:sagemaker-containers:Imported framework sagemaker_xgboost_container.training[0m
[34mINFO:sagemaker-containers:Failed to parse hyperparameter objective value multi:softmax to Json.[0m
[34mReturning the value itself[0m
[34mINFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)[0m
[34mINFO:sagemaker_xgboost_container.training:Running XGBoost Sagemaker in algorithm mode[0m
[34mINFO:root:Determined delimiter of CSV input is ','[0m
[34mINFO:root:Determined delimiter of CSV input is ','[0m
[34mINFO:root:Determined delimiter of CSV input is ','[0m
[34m[15:16:34] 1136x12 matrix with 13632 entries loaded from /opt/ml/input/data/tra

## Deploy Endpoint

And with our model created and our model artifacts in S3, we can deploy our model. The Sagemaker SDK makes this incredibly easy for us. Sagemaker will create the model, endpoint configuration, as well as the endpoint, which are all hosted within Sagemaker.

In [None]:
xgb_predictor = xgb_model.deploy(initial_instance_count=1,
                                instance_type='ml.t2.medium',
                                endpoint_name='change-success-predictor'
                                ) # *** change the name of the endpoint.

--------------