In [None]:
!$(aws ecr get-login --registry-ids 727897471807 --region cn-north-1 --no-include-email)
!docker pull 727897471807.dkr.ecr.cn-north-1.amazonaws.com.cn/mxnet-inference:1.6.0-cpu-py3

In [None]:
!docker run -t 727897471807.dkr.ecr.cn-north-1.amazonaws.com.cn/mxnet-inference:1.6.0-cpu-py3

## Deploy on SageMaker

1. Model parameters
2. Code with data pre-processing and model inference
3. A docker container with dependencies installed
4. Launch a serving end-point with SageMaker SDK

In [None]:
import sagemaker
from sagemaker import get_execution_role, local, Model, utils, fw_utils, s3

# Account/s3 setup
session = sagemaker.Session()
local_session = local.LocalSession()
bucket = session.default_bucket()
prefix = 'sagemaker/mxnet-inference'
region = session.boto_region_name
role = get_execution_role()
client = session.boto_session.client(
    "sts", region_name=region, endpoint_url=utils.sts_regional_endpoint(region)
    )
account = client.get_caller_identity()['Account']
ecr_uri_prefix = utils.get_ecr_image_uri_prefix(account, region)
registry_id = fw_utils._registry_id(region, 'mxnet', 'py3', account, '1.6.0')
registry_uri = utils.get_ecr_image_uri_prefix(registry_id, "cn-north-1")

inference_algorithm_name = 'mxnet-inference'
algorithm_name = inference_algorithm_name
fullname=f"{ecr_uri_prefix}/{algorithm_name}:latest"
image = inference_algorithm_name

In [None]:
print(region)
print(account)
print(registry_id)
print(fullname)
print(image)
print(registry_uri)

In [None]:
!$(aws ecr get-login --registry-ids {account} --region {region} --no-include-email)

In [None]:
# If the repository doesn't exist in ECR, create it.
!aws ecr describe-repositories --repository-names "{image}" --region {region} || aws ecr create-repository --repository-name "{image}" --region {region}

In [None]:
!docker pull {account}.dkr.ecr.cn-north-1.amazonaws.com.cn/htsc-model-b-base:latest

In [None]:
!$(aws ecr get-login --registry-ids {account} --region cn-north-1 --no-include-email)
!docker build -t {image} -f Dockerfile . --build-arg REGISTRY_URI={registry_uri}

In [None]:
!docker run -t -p 8080:8080 --rm {image}

In [None]:
!docker tag {image} {fullname}
!docker push {fullname}

### Use SageMaker SDK to Deploy the Model

We create a MXNet model which can be deployed later, by specifying the docker image, and entry point for the inference code. If serve.py does not work, use dummy_hosting_module.py for debugging purpose.

In [None]:
import sagemaker
from sagemaker.mxnet.model import MXNetModel
import logging

model_data = 's3://' + session.default_bucket() + '/model/model.tar.gz'
print(model_data)

In [None]:
sagemaker_model = MXNetModel(model_data=model_data,
                             image=fullname, # docker images
                             role=role,
                             sagemaker_session=session,
                             py_version='py3',            # python version
                             entry_point='serve.py',
                             framework_version='1.6.0')

In [None]:
endpoint_name="mxnet-inference"

In [None]:
%%time

import logging
from sagemaker.predictor import RealTimePredictor
logging.getLogger().setLevel(logging.WARNING)

predictor = sagemaker_model.deploy(initial_instance_count=1, 
                                   instance_type='ml.m5.xlarge')

logging.getLogger().setLevel(logging.INFO)

### Inference with boto3

In [None]:
import boto3
import time
runtime = boto3.client('sagemaker-runtime')

In [None]:
csv_str = ""
with open("sample_2.csv") as fp:
    csv_str = fp.read()
response = runtime.invoke_endpoint(
    EndpointName=endpoint_name, # predictor.endpoint
    Body=csv_str, # .encode('utf-8'),
    ContentType='text/csv')

In [None]:
print(response['Body'].read().decode("utf-8"))

### Clean Up

Remove the endpoint after we are done.

In [None]:
predictor.delete_endpoint()