In [2]:
%%capture
!make build_train_cpu
!make push_train_cpu
!make build_inference_cpu
!make push_inference_cpu 

In [94]:
import boto3
import time
import sagemaker
import pandas as pd
import numpy as np
from sagemaker.estimator import Framework
from sagemaker.mxnet import MXNetModel

In [39]:
ecr_serving_repo = "autogluon-inference-cpu"
ecr_training_repo = "autogluon-cpu"
role = sagemaker.get_execution_role()
sess = sagemaker.session.Session()
s3_bucket = sess.default_bucket()
s3_key = "autogluon-example"
region = sess._region_name
account = boto3.client("sts").get_caller_identity().get("Account")
training_image_uri = f"{account}.dkr.ecr.{region}.amazonaws.com/{ecr_training_repo}:latest"
serving_image_uri = f"{account}.dkr.ecr.{region}.amazonaws.com/{ecr_serving_repo}:latest"

In [8]:
class AutoGluon(Framework):
    def __init__(
        self,
        entry_point,
        source_dir=None,
        hyperparameters=None,
        py_version="py3",
        framework_version=None,
        image_uri=None,
        distributions=None,
        **kwargs
    ):
        super().__init__(
            entry_point, source_dir, hyperparameters, image_uri=image_uri, **kwargs
        )
    
    def _configure_distribution(self, distributions):
        return
    
    def create_model(
        self,
        model_server_workers=None,
        role=None,
        vpc_config_override=None,
        entry_point=None,
        source_dir=None,
        dependencies=None,
        image_name=None,
        **kwargs
    ):
        return None

In [12]:
tabular_estimator =  AutoGluon(image_uri=training_image_uri,
                      role=role,
                      entry_point="training_scripts/tabular.py",
                      instance_count=1, 
                      instance_type="ml.m5.2xlarge",
                      base_job_name="tabular-train",
                      )

In [13]:
tabular_estimator.fit()

2021-07-28 17:45:17 Starting - Starting the training job...
2021-07-28 17:45:22 Starting - Launching requested ML instancesProfilerReport-1627494317: InProgress
......
2021-07-28 17:46:47 Starting - Preparing the instances for training.........
2021-07-28 17:48:07 Downloading - Downloading input data
2021-07-28 17:48:07 Training - Downloading the training image...............
2021-07-28 17:50:48 Training - Training image download completed. Training in progress.[34m2021-07-28 17:50:35,916 sagemaker-training-toolkit INFO     Imported framework sagemaker_mxnet_container.training[0m
[34m2021-07-28 17:50:35,918 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-07-28 17:50:35,928 sagemaker_mxnet_container.training INFO     MXNet training environment: {'SM_HOSTS': '["algo-1"]', 'SM_NETWORK_INTERFACE_NAME': 'eth0', 'SM_HPS': '{}', 'SM_USER_ENTRY_POINT': 'tabular.py', 'SM_FRAMEWORK_PARAMS': '{}', 'SM_RESOURCE_CONFIG': '{"current_host":"algo-1",

In [15]:
s3_model_artifact = tabular_estimator.model_data

In [17]:
model = MXNetModel(model_data=s3_model_artifact, 
                   role=role, 
                   source_dir="inference_scripts",
                   entry_point="tabular_serve.py",
                   image_uri=serving_image_uri
)
model.framework_version = "1.8.0"

In [18]:
predictor = model.deploy(initial_instance_count=1, instance_type="ml.m4.xlarge")

-----------------!

In [22]:
from sagemaker.serializers import CSVSerializer
predictor.serializer = CSVSerializer()

In [20]:
df = pd.read_csv("https://autogluon.s3.amazonaws.com/datasets/Inc/train.csv")

In [24]:
predictor.predict(df.iloc[:10, :-1].values) # test inference with SDK

[[0.9721353054046631, 0.027864700183272362],
 [0.9936944842338562, 0.006305492017418146],
 [0.9840673804283142, 0.015932617709040642],
 [0.8471196293830872, 0.15288037061691284],
 [0.9168673157691956, 0.08313269913196564],
 [0.2918594479560852, 0.7081405520439148],
 [0.7997081279754639, 0.20029188692569733],
 [0.8797551393508911, 0.12024485319852829],
 [0.9925820827484131, 0.007417936343699694],
 [0.646324872970581, 0.35367509722709656]]

## Deploy with boto3

In [37]:
sm_client = boto3.client("sagemaker")
smrt_client = boto3.client("sagemaker-runtime")
s3_client = boto3.client("s3")

In [40]:
# !aws sagemaker describe-endpoint --endpoint-name {predictor.endpoint_name}

In [79]:
import tarfile
from io import BytesIO
import os
_, _, model_bucket, *model_key = s3_model_artifact.split("/")
model_key = "/".join(model_key)

s3_client.download_file(model_bucket, model_key, "model.tar.gz")

with tarfile.open("model.tar.gz", "r:gz") as model_tar:
    with tarfile.open("model_code.tar.gz", "w:gz") as artifact_tar:
        for file in model_tar:
            if file.isfile():
                buf = model_tar.extractfile(file).read()
                tar_info = tarfile.TarInfo(file.name)
                tar_info.size = len(buf)
                artifact_tar.addfile(tar_info, BytesIO(buf))
        artifact_tar.add("inference_scripts/", arcname="code")

s3_code_artifact_key = "autogluon/tabular-test/model_code.tar.gz"
s3_client.upload_file("model_code.tar.gz", bucket, s3_code_artifact_key)

os.remove("model.tar.gz")
os.remove("model_code.tar.gz")

In [80]:
now = int(time.time())
model_name = f"autogluon-example-mode-{now}"
endpoint_config_name = f"autogluon-example-endpoint-config-{now}"
endpoint_name = f"autogluon-example-endpoint-{now}"

In [82]:
create_model_response = sm_client.create_model(
      ModelName=model_name,
      PrimaryContainer={
          'Image': serving_image_uri,
          'Mode': 'SingleModel',
          'ModelDataUrl': f"s3://{bucket}/{s3_code_artifact_key}",
          'Environment': {
            "SAGEMAKER_CONTAINER_LOG_LEVEL": "20",
            "SAGEMAKER_PROGRAM": "tabular_serve.py",
            "SAGEMAKER_REGION": "us-east-1",
            "SAGEMAKER_SUBMIT_DIRECTORY": "s3://sagemaker-us-east-1-152804913371/autogluon-inference-cpu-2021-07-28-17-55-51-844/model.tar.gz"
          }
      },
      ExecutionRoleArn=role,

  )

In [83]:
create_endpoint_conf_resp = sm_client.create_endpoint_config(EndpointConfigName=endpoint_config_name,
                                 ProductionVariants=[dict(VariantName="AllTraffic",
                                                          ModelName=model_name,
                                                          InitialInstanceCount=1,
                                                          InstanceType="ml.m4.xlarge",
                                                          InitialVariantWeight=1)])

In [85]:
create_endpoint_resp = sm_client.create_endpoint(EndpointName=endpoint_name,
                          EndpointConfigName=endpoint_config_name
                     )

In [88]:
import time
while sm_client.describe_endpoint(EndpointName=endpoint_name)["EndpointStatus"] == "Creating":
    time.sleep(30)

In [89]:
sample_records = "\n".join([",".join(map(str,row)) for row in df.iloc[:10, :-1].values.tolist()])

In [92]:
predictions = smrt_client.invoke_endpoint(
    EndpointName=endpoint_name, 
    Body=sample_records.encode("utf8"), 
    ContentType="text/csv")["Body"].read()

In [100]:
import json
json.loads(predictions)

[[0.9721353054046631, 0.027864700183272362],
 [0.9936944842338562, 0.006305492017418146],
 [0.9840673804283142, 0.015932617709040642],
 [0.8471196293830872, 0.15288037061691284],
 [0.9168673157691956, 0.08313269913196564],
 [0.2918594479560852, 0.7081405520439148],
 [0.7997081279754639, 0.20029188692569733],
 [0.8797551393508911, 0.12024485319852829],
 [0.9925820827484131, 0.007417936343699694],
 [0.646324872970581, 0.35367509722709656]]