## SageMaker model deployment in 3 simple steps
Use of low level Python calls demonstrates behind the scene actions which allows more precise control of model deployment

In [1]:
import boto3
from sagemaker import get_execution_role
from time import gmtime, strftime

In [2]:
role = get_execution_role()
smclient = boto3.client('sagemaker')

containers = {'us-west-2': '433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:latest',
              'us-east-1': '811284229777.dkr.ecr.us-east-1.amazonaws.com/xgboost:latest',
              'us-east-2': '825641698319.dkr.ecr.us-east-2.amazonaws.com/xgboost:latest',
              'eu-west-1': '685385470294.dkr.ecr.eu-west-1.amazonaws.com/xgboost:latest'}
container = containers[boto3.Session().region_name]

## 1. create target model from saved model artifacts
Each previously run training job saves result in S3, identified by training job name (replace with your own training job)

In [3]:
saved_model = smclient.describe_training_job(TrainingJobName='xgb-hpo-mar01-2139-a4e92-66ad85cb-34cf-4cb5-8a5d-7c9a71')
model_url = saved_model['ModelArtifacts']['S3ModelArtifacts']
print(model_url)

s3://dslab-west/bosch/sagemaker_hpo_output/xgb-hpo-mar01-2139-a4e92-66ad85cb-34cf-4cb5-8a5d-7c9a71/output/model.tar.gz


In [4]:
deploy_model_name = 'xgboost-hpo-deploymodel-'+ strftime("%Y-%m-%d-%H-%M-%S", gmtime())

primary_container = {
    'Image': container,
    'ModelDataUrl': model_url
}

create_model_response = smclient.create_model(
    ModelName = deploy_model_name,
    ExecutionRoleArn = role,
    PrimaryContainer = primary_container)

print(create_model_response['ModelArn'])

arn:aws:sagemaker:us-west-2:448855094770:model/xgboost-hpo-deploymodel-2018-03-18-16-15-36


## 2. Configure deployment instances (service endpoints)

In [5]:
# create endpoint config
endpoint_config_name = 'xgboost-hpo-endpoint-config-' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print(endpoint_config_name)

create_endpoint_config_response = smclient.create_endpoint_config(
    EndpointConfigName = endpoint_config_name,
    ProductionVariants=[{
        'InstanceType':'ml.t2.medium',
        'InitialInstanceCount':1,
        'ModelName':deploy_model_name,
        'VariantName':'AllTraffic'}])

print("Endpoint Config Arn: " + create_endpoint_config_response['EndpointConfigArn'])

xgboost-hpo-endpoint-config-2018-03-18-16-15-37
Endpoint Config Arn: arn:aws:sagemaker:us-west-2:448855094770:endpoint-config/xgboost-hpo-endpoint-config-2018-03-18-16-15-37


## 3. Deploy model to HTTP service endpoints

In [6]:
#create endpoint
endpoint_name = 'xgboost-hpo-endpoint-' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print(endpoint_name)
create_endpoint_response = smclient.create_endpoint(
    EndpointName=endpoint_name,
    EndpointConfigName=endpoint_config_name)
print(create_endpoint_response['EndpointArn'])

resp = smclient.describe_endpoint(EndpointName=endpoint_name)
status = resp['EndpointStatus']
print("Status: " + status)
try:
    smclient.get_waiter('endpoint_in_service').wait(EndpointName=endpoint_name)
finally:
    resp = smclient.describe_endpoint(EndpointName=endpoint_name)
    status = resp['EndpointStatus']
    print("Arn: " + resp['EndpointArn'])
    print("Create endpoint ended with status: " + status)

    if status != 'InService':
        message = smclient.describe_endpoint(EndpointName=endpoint_name)['FailureReason']
        print('Training failed with the following error: {}'.format(message))
        raise Exception('Endpoint creation did not succeed')

xgboost-hpo-endpoint-2018-03-18-16-15-37
arn:aws:sagemaker:us-west-2:448855094770:endpoint/xgboost-hpo-endpoint-2018-03-18-16-15-37
Status: Creating
Arn: arn:aws:sagemaker:us-west-2:448855094770:endpoint/xgboost-hpo-endpoint-2018-03-18-16-15-37
Create endpoint ended with status: InService
