From 9528a11375fcfdb7b6601d83b29127ec0434aa17 Mon Sep 17 00:00:00 2001
From: Benjamin <ben@neuralmagic.com>
Date: Mon, 4 Apr 2022 22:27:54 -0400
Subject: [PATCH 1/4] AWS Sagemaker example integration

---
 examples/aws-sagemaker/Dockerfile | 24 ++++++++++++++++++++++++
 src/deepsparse/server/config.py   | 19 ++++++++++++++++++-
 src/deepsparse/server/main.py     | 30 ++++++++++++++++++++++++++----
 3 files changed, 68 insertions(+), 5 deletions(-)
 create mode 100644 examples/aws-sagemaker/Dockerfile

diff --git a/examples/aws-sagemaker/Dockerfile b/examples/aws-sagemaker/Dockerfile
new file mode 100644
index 0000000000..9d0d1e0a9e
--- /dev/null
+++ b/examples/aws-sagemaker/Dockerfile
@@ -0,0 +1,24 @@
+FROM python:3.8-bullseye
+
+ARG config_path=./config.yaml
+
+USER root
+
+RUN apt-get -qq -y update && \
+    apt-get -qq -y upgrade && \
+    apt-get -y autoclean && \
+    apt-get -y autoremove && \
+    rm -rf /var/lib/apt/lists/*
+
+
+COPY ${config_path} /root/server-config.yaml
+
+ENV VIRTUAL_ENV=/venv
+ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+
+
+RUN python3 -m venv $VIRTUAL_ENV && \
+    pip3 install --no-cache-dir --upgrade pip && \
+    pip3 install --no-cache-dir "deepsparse[server]"
+
+ENTRYPOINT ["deepsparse.server", "--port", "8080", "--config_file", "/root/server-config.yaml"]
\ No newline at end of file
diff --git a/src/deepsparse/server/config.py b/src/deepsparse/server/config.py
index d74ed10231..41bbc8247e 100644
--- a/src/deepsparse/server/config.py
+++ b/src/deepsparse/server/config.py
@@ -117,6 +117,14 @@ class ServerConfig(BaseModel):
             "Defaults to the number of physical cores on the device."
         ),
     )
+    integration: str = Field(
+        default=None,
+        description=(
+            "Name of deployment integration that this server will be deployed to "
+            "Currently supported options are None for default inference and "
+            "'sagemaker' for inference deployment with AWS Sagemaker"
+        ),
+    )
 
 
 @lru_cache()
@@ -170,6 +178,7 @@ def server_config_to_env(
     task: str,
     model_path: str,
     batch_size: int,
+    integration: str,
     env_key: str = ENV_DEEPSPARSE_SERVER_CONFIG,
 ):
     """
@@ -186,6 +195,9 @@ def server_config_to_env(
         If config_file is supplied, this is ignored.
     :param batch_size: the batch size to serve the model from model_path with.
         If config_file is supplied, this is ignored.
+    :param integration: name of deployment integration that this server will be
+        deployed to. Supported options include None for default inference and
+        sagemaker for inference deployment on AWS Sagemaker
     :param env_key: the environment variable to set the configuration in.
         Defaults to ENV_DEEPSPARSE_SERVER_CONFIG
     """
@@ -199,7 +211,12 @@ def server_config_to_env(
             )
 
         single_str = json.dumps(
-            {"task": task, "model_path": model_path, "batch_size": batch_size}
+            {
+                "task": task,
+                "model_path": model_path,
+                "batch_size": batch_size,
+                "integration": integration,
+            }
         )
         config = f"{ENV_SINGLE_PREFIX}{single_str}"
 
diff --git a/src/deepsparse/server/main.py b/src/deepsparse/server/main.py
index 564bc5e42b..fa8896948a 100644
--- a/src/deepsparse/server/main.py
+++ b/src/deepsparse/server/main.py
@@ -123,10 +123,20 @@ def _home():
     _LOGGER.info("created general routes, visit `/docs` to view available")
 
 
-def _add_pipeline_route(app, pipeline_def, num_models: int, defined_tasks: set):
+def _add_pipeline_route(
+    app, pipeline_def, num_models: int, defined_tasks: set, integration: str
+):
     path = "/predict"
 
-    if pipeline_def.config.alias:
+    if integration.lower() == "sagemaker":
+        if num_models > 1:
+            raise ValueError(
+                "Sagemaker inference with deepsparse.server currently supports "
+                f"serving one model, received config for {num_models} models"
+            )
+        # required path name for Sagemaker
+        path = "/invocations"
+    elif pipeline_def.config.alias:
         path = f"/predict/{pipeline_def.config.alias}"
     elif num_models > 1:
         if pipeline_def.config.task in defined_tasks:
@@ -171,8 +181,11 @@ def server_app_factory():
     _LOGGER.debug("loaded pipeline definitions from config %s", pipeline_defs)
     num_tasks = len(config.models)
     defined_tasks = set()
+
     for pipeline_def in pipeline_defs:
-        _add_pipeline_route(app, pipeline_def, num_tasks, defined_tasks)
+        _add_pipeline_route(
+            app, pipeline_def, num_tasks, defined_tasks, config.integration
+        )
 
     return app
 
@@ -235,6 +248,14 @@ def server_app_factory():
     help="The batch size to serve the model from model_path with. "
     "Ignored if config_file is supplied.",
 )
+@click.option(
+    "--integration",
+    type=str,
+    default=None,
+    help="Name of deployment integration that this server will be deployed to "
+    "Currently supported options are None for default inference and 'sagemaker' for "
+    "inference deployment with AWS Sagemaker",
+)
 def start_server(
     host: str,
     port: int,
@@ -244,6 +265,7 @@ def start_server(
     task: str,
     model_path: str,
     batch_size: int,
+    integration: str,
 ):
     """
     Start a DeepSparse inference server for serving the models and pipelines given
@@ -263,7 +285,7 @@ def start_server(
           alias: question_answering/sparse_quantized
     """
     set_logging_level(getattr(logging, log_level.upper()))
-    server_config_to_env(config_file, task, model_path, batch_size)
+    server_config_to_env(config_file, task, model_path, batch_size, integration)
     filename = Path(__file__).stem
     package = "deepsparse.server"
     app_name = f"{package}.{filename}:server_app_factory"

From 390b8029357a2076eb9f697d024caa6c209c5494 Mon Sep 17 00:00:00 2001
From: Benjamin <ben@neuralmagic.com>
Date: Fri, 22 Apr 2022 17:37:12 -0400
Subject: [PATCH 2/4] documentation, sample config, dockerfile fixes

---
 examples/aws-sagemaker/Dockerfile  |  15 +-
 examples/aws-sagemaker/README.md   | 265 +++++++++++++++++++++++++++++
 examples/aws-sagemaker/config.yaml |   5 +
 3 files changed, 282 insertions(+), 3 deletions(-)
 create mode 100644 examples/aws-sagemaker/README.md
 create mode 100644 examples/aws-sagemaker/config.yaml

diff --git a/examples/aws-sagemaker/Dockerfile b/examples/aws-sagemaker/Dockerfile
index 9d0d1e0a9e..6d678aeac1 100644
--- a/examples/aws-sagemaker/Dockerfile
+++ b/examples/aws-sagemaker/Dockerfile
@@ -1,4 +1,4 @@
-FROM python:3.8-bullseye
+FROM python:3.8
 
 ARG config_path=./config.yaml
 
@@ -19,6 +19,15 @@ ENV PATH="$VIRTUAL_ENV/bin:$PATH"
 
 RUN python3 -m venv $VIRTUAL_ENV && \
     pip3 install --no-cache-dir --upgrade pip && \
-    pip3 install --no-cache-dir "deepsparse[server]"
+    pip3 install --no-cache-dir "deepsparse-nightly[server]"  # TODO: switch to deepsparse[server] >= 0.12
 
-ENTRYPOINT ["deepsparse.server", "--port", "8080", "--config_file", "/root/server-config.yaml"]
\ No newline at end of file
+# create 'serve' command for sagemaker entrypoint
+RUN mkdir /opt/server/
+RUN echo "#! /bin/bash" > /opt/server/serve
+RUN echo "deepsparse.server --port 8080 --config_file /root/server-config.yaml" >> /opt/server/serve
+RUN chmod 777 /opt/server/serve
+
+ENV PATH="/opt/server:${PATH}"
+WORKDIR /opt/server
+
+ENTRYPOINT ["bash", "/opt/server/serve"]
diff --git a/examples/aws-sagemaker/README.md b/examples/aws-sagemaker/README.md
new file mode 100644
index 0000000000..572e1e58b3
--- /dev/null
+++ b/examples/aws-sagemaker/README.md
@@ -0,0 +1,265 @@
+<!--
+Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+# Deploy DeepSparse with Amazon SageMaker
+
+[Amazon SageMaker](https://docs.aws.amazon.com/sagemaker/index.html)
+offers easy to use infrastructure for deploying deep learning models at scale.
+This directory provides a guided example for deploying a 
+[DeepSparse](https://github.com/neuralmagic/deepsparse) inference server on SageMaker.
+Using both of these tools, deployments benefit from sparse-CPU acceleration from
+DeepSparse and automatic scaling from SageMaker.
+
+
+## Contents
+In addition to the step-by-step instructions in this guide, this directory contains
+additional files to aide in the deployment.
+
+### Dockerfile
+The included `Dockerfile` builds an image on top of the standard `python:3.8` image
+with `deepsparse` installed and creates an executable command `serve` that runs
+`deepsparse.server` on port 8080.  SageMaker will execute this image by running
+`docker run serve` and expects the image to serve inference requests at the
+`invocations/` endpoint.
+
+For general customization of the server, changes should not need to be made
+to the dockerfile, but to the `config.yaml` file that the dockerfile reads from
+instead.
+
+### config.yaml
+`config.yaml` used to configure the DeepSparse serve running in the Dockerfile.
+It is important that the config contains the line `integration: sagemaker` so
+endpoints may be provisioned correctly to match SageMaker specifications.
+
+Notice that the `model_path` and `task` are set to run a sparse-quantized
+question-answering model from [SparseZoo](https://sparsezoo.neuralmagic.com/).
+To use a model directory stored in `s3`, set `model_path` to `/opt/ml/model` in
+the config and add `ModelDataUrl=<MODEL-S3-PATH>` to the `CreateModel` arguments.
+SageMaker will automatically copy the files from the s3 path into `/opt/ml/model`
+which the server can then read from.
+
+More information on the DeepSparse server and its configuration can be found
+[here](https://github.com/neuralmagic/deepsparse/tree/main/src/deepsparse/server#readme).
+
+
+## Deploying to SageMaker
+The following steps are required to provision and deploy DeepSparse to sagemaker
+for inference:
+* Build the DeepSparse-SageMaker `Dockerfile` into a local docker image
+* Create an [Amazon ECR](https://aws.amazon.com/ecr/) repository to host the image
+* Push the image to the ECR repository
+* Create a SageMaker `Model` that reads from the hosted ECR image
+* Build a SageMaker `EndpointConfig` that defines how to provision the model deployment
+* Launch the SageMaker `Endpoint` defined by the `Model` and `EndpointConfig`
+
+### Requirements
+The listed steps can be easily completed using a `python` and `bash`. The following
+credentials, tools, and libraries are also required:
+* The [`aws` cli](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html) that is [configured](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-quickstart.html)
+* The [ARN of an AWS role](https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-roles.html) your user has access to that has full SageMaker and ECR permissions. In the following steps, we will refer to this as `ROLE_ARN`. It should take the form `"arn:aws:iam::XXX:role/service-role/XXX"`
+* [Docker and the `docker` cli](https://docs.docker.com/get-docker/)
+* The `boto3` python AWS sdk (`pip install boto3`)
+
+### Build the DeepSparse-SageMaker image locally
+The `Dockerfile` can be build from this directory from a bash shell using the following command.
+The image will be tagged locally as `deepsparse-sagemaker-example`.
+
+```bash
+docker build -t deepsparse-sagemaker-example .
+```
+
+### Create an ECR Repository
+The following code snippet can be used in python to create an ECR repository.
+The `region_name` can be swapped to a preferred region. The repository will be named
+`deepsparse-sagemaker`.  If the repository is already created, this step may be skipped.
+
+```python
+import boto3
+
+ecr = boto3.client("ecr", region_name='us-east-1')
+cr_res = ecr.create_repository(repositoryName="sagemaker-1")
+```
+
+### Push local image to ECR Repository
+Once the image is built and the ECR repository is created, the image can be pushed using the following
+bash commands.
+
+```bash
+account=$(aws sts get-caller-identity --query Account | sed -e 's/^"//' -e 's/"$//')
+region=$(aws configure get region)
+ecr_account=${account}.dkr.ecr.${region}.amazonaws.com
+
+aws ecr get-login-password --region $region | docker login --username AWS --password-stdin $ecr_account
+fullname=$ecr_account/deepsparse-example:latest
+
+docker tag deepsparse-sagemaker-example:latest $fullname
+docker push $fullname
+```
+
+An abbreviated successful output will look like:
+```
+Login Succeeded
+The push refers to repository [XXX.dkr.ecr.us-east-1.amazonaws.com/deepsparse-example]
+3c2284f66840: Preparing
+08fa02ce37eb: Preparing
+a037458de4e0: Preparing
+bafdbe68e4ae: Preparing
+a13c519c6361: Preparing
+6817758dd480: Waiting
+6d95196cbe50: Waiting
+e9872b0f234f: Waiting
+c18b71656bcf: Waiting
+2174eedecc00: Waiting
+03ea99cd5cd8: Pushed
+585a375d16ff: Pushed
+5bdcc8e2060c: Pushed
+latest: digest: sha256:XXX size: 3884
+```
+
+### Create SageMaker Model
+A SageMaker `Model` can now be created referencing the pushed image.
+The example model will be named `question-answering-example`.
+As mentioned in the requirements, `ROLE_ARN` should be a string arn of an AWS
+role with full access to SageMaker.
+
+```python
+sm_boto3 = boto3.client("sagemaker", region_name="us-east-1")
+
+region = boto3.Session().region_name
+account_id = boto3.client("sts").get_caller_identity()["Account"]
+
+image_uri = "{}.dkr.ecr.{}.amazonaws.com/deepsparse-example:latest".format(account_id, region)
+
+cm_res = sm_boto3.create_model(
+    ModelName="question-answering-example",
+    Containers=[
+        {
+            "Image": image_uri,
+        },
+    ],
+    ExecutionRoleArn=ROLE_ARN,
+    EnableNetworkIsolation=False,
+)
+```
+
+More information about options for configuring SageMaker `Model` instances can
+be found [here](https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_CreateModel.html).
+
+
+### Build SageMaker EndpointConfig
+The `EndpointConfig` is used to set the instance type to provision, how many, scaling
+rules, and other deployment settings.  The following code snippet defines an endpoint
+with a single machine using an `ml.c5.large` CPU.
+
+* [Full list of available instances](https://docs.aws.amazon.com/sagemaker/latest/dg/notebooks-available-instance-types.html) (See Compute optimized (no GPUs) section)
+* [EndpointConfig documentation and options](https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_CreateEndpointConfig.html)
+
+```python
+model_name = "question-answering-example"  # model defined above
+initial_instance_count = 1
+instance_type = "ml.c5.large"
+
+variant_name = "QuestionAnsweringDeepSparseDemo"  # ^[a-zA-Z0-9](-*[a-zA-Z0-9]){0,62}
+
+production_variants = [
+    {
+        "VariantName": variant_name,
+        "ModelName": model_name,
+        "InitialInstanceCount": initial_instance_count,
+        "InstanceType": instance_type,
+    }
+]
+
+endpoint_config_name = "QuestionAnsweringExampleConfig"  # ^[a-zA-Z0-9](-*[a-zA-Z0-9]){0,62}
+
+endpoint_config = {
+    "EndpointConfigName": endpoint_config_name,
+    "ProductionVariants": production_variants,
+}
+
+endpoint_config_res = sm_boto3.create_endpoint_config(**endpoint_config)
+```
+
+### Launch SageMaker Endpoint
+Once the `EndpointConfig` is defined, the endpoint can be easily launched using
+the `create_endpoint` command:
+
+```python
+endpoint_name = "question-answering-example-endpoint"
+endpoint_res = sm_boto3.create_endpoint(
+    EndpointName=endpoint_name, EndpointConfigName=endpoint_config_name
+)
+```
+
+After creating the endpoint, it's status can be checked by running the following.
+Initially, the `EndpointStatus` will be `Creating`. Checking after the image is
+successfully launched, it will be `InService`. If there are any errors, it will 
+become `Failed`.
+
+```python
+print(sm_boto3.describe_endpoint(EndpointName=endpoint_name))
+```
+
+
+## Making a reqest to the Endpoint
+After the endpoint is in service, requests can be made to it through the
+`invoke_endpoint` api. Inputs will be passed as a json payload.
+
+```python
+import json
+
+sm_runtime = boto3.client("sagemaker-runtime", region_name="us-east-1")
+
+body = json.dumps(
+    dict(
+        question="Where do I live?",
+        context="I am a student and I live in Cambridge",
+    )
+)
+
+content_type = "application/json"
+accept = "text/plain"
+
+res = sm_runtime.invoke_endpoint(
+    EndpointName=endpoint_name,
+    Body=body,
+    ContentType=content_type,
+    Accept=accept,
+)
+
+print(res["body"].readlines())
+```
+
+
+### Cleanup
+The model and endpoint can be deleted with the following commands:
+```python
+sm_boto3.delete_endpoint(EndpointName=endpoint_name)
+sm_boto3.delete_endpoint_config(EndpointConfigName=endpoint_config_name)
+sm_boto3.delete_model(ModelName=model_name)
+```
+
+## Next Steps
+These steps create an invokable SageMaker inference endpoint powered with the DeepSparse
+engine.  The `EndpointConfig` settings may be adjusted to set instance scaling rules based
+on deployment needs.
+
+More information on deploying custom models with SageMaker can be found
+[here](https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html).
+
+Open an [issue](https://github.com/neuralmagic/deepsparse/issues)
+or reach out to the [DeepSparse community](https://join.slack.com/t/discuss-neuralmagic/shared_invite/zt-q1a1cnvo-YBoICSIw3L1dmQpjBeDurQ)
+with any issues, questions, or ideas.
diff --git a/examples/aws-sagemaker/config.yaml b/examples/aws-sagemaker/config.yaml
new file mode 100644
index 0000000000..cfca35a27c
--- /dev/null
+++ b/examples/aws-sagemaker/config.yaml
@@ -0,0 +1,5 @@
+models:
+    - task: question_answering
+      model_path: zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/pruned_quant-moderate
+      batch_size: 1
+integration: sagemaker

From ead0d4db01210a3f5f8a206f191b111fcead5609 Mon Sep 17 00:00:00 2001
From: Benjamin <ben@neuralmagic.com>
Date: Fri, 22 Apr 2022 17:45:47 -0400
Subject: [PATCH 3/4] fix ecr repo name

---
 examples/aws-sagemaker/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/aws-sagemaker/README.md b/examples/aws-sagemaker/README.md
index 572e1e58b3..de784d95b0 100644
--- a/examples/aws-sagemaker/README.md
+++ b/examples/aws-sagemaker/README.md
@@ -90,7 +90,7 @@ The `region_name` can be swapped to a preferred region. The repository will be n
 import boto3
 
 ecr = boto3.client("ecr", region_name='us-east-1')
-cr_res = ecr.create_repository(repositoryName="sagemaker-1")
+cr_res = ecr.create_repository(repositoryName="deepsparse-sagemaker")
 ```
 
 ### Push local image to ECR Repository

From 2c3c7e9534b20937cf3fca098dbcb8d9ab9c4825 Mon Sep 17 00:00:00 2001
From: Benjamin <ben@neuralmagic.com>
Date: Fri, 22 Apr 2022 18:05:58 -0400
Subject: [PATCH 4/4] readme code changes from testing

---
 examples/aws-sagemaker/README.md | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/examples/aws-sagemaker/README.md b/examples/aws-sagemaker/README.md
index de784d95b0..315c480391 100644
--- a/examples/aws-sagemaker/README.md
+++ b/examples/aws-sagemaker/README.md
@@ -90,7 +90,7 @@ The `region_name` can be swapped to a preferred region. The repository will be n
 import boto3
 
 ecr = boto3.client("ecr", region_name='us-east-1')
-cr_res = ecr.create_repository(repositoryName="deepsparse-sagemaker")
+create_repository_res = ecr.create_repository(repositoryName="deepsparse-sagemaker")
 ```
 
 ### Push local image to ECR Repository
@@ -103,7 +103,7 @@ region=$(aws configure get region)
 ecr_account=${account}.dkr.ecr.${region}.amazonaws.com
 
 aws ecr get-login-password --region $region | docker login --username AWS --password-stdin $ecr_account
-fullname=$ecr_account/deepsparse-example:latest
+fullname=$ecr_account/deepsparse-sagemaker:latest
 
 docker tag deepsparse-sagemaker-example:latest $fullname
 docker push $fullname
@@ -141,9 +141,9 @@ sm_boto3 = boto3.client("sagemaker", region_name="us-east-1")
 region = boto3.Session().region_name
 account_id = boto3.client("sts").get_caller_identity()["Account"]
 
-image_uri = "{}.dkr.ecr.{}.amazonaws.com/deepsparse-example:latest".format(account_id, region)
+image_uri = "{}.dkr.ecr.{}.amazonaws.com/deepsparse-sagemaker:latest".format(account_id, region)
 
-cm_res = sm_boto3.create_model(
+create_model_res = sm_boto3.create_model(
     ModelName="question-answering-example",
     Containers=[
         {
@@ -210,7 +210,8 @@ successfully launched, it will be `InService`. If there are any errors, it will
 become `Failed`.
 
 ```python
-print(sm_boto3.describe_endpoint(EndpointName=endpoint_name))
+from pprint import pprint
+pprint(sm_boto3.describe_endpoint(EndpointName=endpoint_name))
 ```
 
 
@@ -240,7 +241,7 @@ res = sm_runtime.invoke_endpoint(
     Accept=accept,
 )
 
-print(res["body"].readlines())
+print(res["Body"].readlines())
 ```