From 9528a11375fcfdb7b6601d83b29127ec0434aa17 Mon Sep 17 00:00:00 2001 From: Benjamin Date: Mon, 4 Apr 2022 22:27:54 -0400 Subject: [PATCH 1/4] AWS Sagemaker example integration --- examples/aws-sagemaker/Dockerfile | 24 ++++++++++++++++++++++++ src/deepsparse/server/config.py | 19 ++++++++++++++++++- src/deepsparse/server/main.py | 30 ++++++++++++++++++++++++++---- 3 files changed, 68 insertions(+), 5 deletions(-) create mode 100644 examples/aws-sagemaker/Dockerfile diff --git a/examples/aws-sagemaker/Dockerfile b/examples/aws-sagemaker/Dockerfile new file mode 100644 index 0000000000..9d0d1e0a9e --- /dev/null +++ b/examples/aws-sagemaker/Dockerfile @@ -0,0 +1,24 @@ +FROM python:3.8-bullseye + +ARG config_path=./config.yaml + +USER root + +RUN apt-get -qq -y update && \ + apt-get -qq -y upgrade && \ + apt-get -y autoclean && \ + apt-get -y autoremove && \ + rm -rf /var/lib/apt/lists/* + + +COPY ${config_path} /root/server-config.yaml + +ENV VIRTUAL_ENV=/venv +ENV PATH="$VIRTUAL_ENV/bin:$PATH" + + +RUN python3 -m venv $VIRTUAL_ENV && \ + pip3 install --no-cache-dir --upgrade pip && \ + pip3 install --no-cache-dir "deepsparse[server]" + +ENTRYPOINT ["deepsparse.server", "--port", "8080", "--config_file", "/root/server-config.yaml"] \ No newline at end of file diff --git a/src/deepsparse/server/config.py b/src/deepsparse/server/config.py index d74ed10231..41bbc8247e 100644 --- a/src/deepsparse/server/config.py +++ b/src/deepsparse/server/config.py @@ -117,6 +117,14 @@ class ServerConfig(BaseModel): "Defaults to the number of physical cores on the device." ), ) + integration: str = Field( + default=None, + description=( + "Name of deployment integration that this server will be deployed to " + "Currently supported options are None for default inference and " + "'sagemaker' for inference deployment with AWS Sagemaker" + ), + ) @lru_cache() @@ -170,6 +178,7 @@ def server_config_to_env( task: str, model_path: str, batch_size: int, + integration: str, env_key: str = ENV_DEEPSPARSE_SERVER_CONFIG, ): """ @@ -186,6 +195,9 @@ def server_config_to_env( If config_file is supplied, this is ignored. :param batch_size: the batch size to serve the model from model_path with. If config_file is supplied, this is ignored. + :param integration: name of deployment integration that this server will be + deployed to. Supported options include None for default inference and + sagemaker for inference deployment on AWS Sagemaker :param env_key: the environment variable to set the configuration in. Defaults to ENV_DEEPSPARSE_SERVER_CONFIG """ @@ -199,7 +211,12 @@ def server_config_to_env( ) single_str = json.dumps( - {"task": task, "model_path": model_path, "batch_size": batch_size} + { + "task": task, + "model_path": model_path, + "batch_size": batch_size, + "integration": integration, + } ) config = f"{ENV_SINGLE_PREFIX}{single_str}" diff --git a/src/deepsparse/server/main.py b/src/deepsparse/server/main.py index 564bc5e42b..fa8896948a 100644 --- a/src/deepsparse/server/main.py +++ b/src/deepsparse/server/main.py @@ -123,10 +123,20 @@ def _home(): _LOGGER.info("created general routes, visit `/docs` to view available") -def _add_pipeline_route(app, pipeline_def, num_models: int, defined_tasks: set): +def _add_pipeline_route( + app, pipeline_def, num_models: int, defined_tasks: set, integration: str +): path = "/predict" - if pipeline_def.config.alias: + if integration.lower() == "sagemaker": + if num_models > 1: + raise ValueError( + "Sagemaker inference with deepsparse.server currently supports " + f"serving one model, received config for {num_models} models" + ) + # required path name for Sagemaker + path = "/invocations" + elif pipeline_def.config.alias: path = f"/predict/{pipeline_def.config.alias}" elif num_models > 1: if pipeline_def.config.task in defined_tasks: @@ -171,8 +181,11 @@ def server_app_factory(): _LOGGER.debug("loaded pipeline definitions from config %s", pipeline_defs) num_tasks = len(config.models) defined_tasks = set() + for pipeline_def in pipeline_defs: - _add_pipeline_route(app, pipeline_def, num_tasks, defined_tasks) + _add_pipeline_route( + app, pipeline_def, num_tasks, defined_tasks, config.integration + ) return app @@ -235,6 +248,14 @@ def server_app_factory(): help="The batch size to serve the model from model_path with. " "Ignored if config_file is supplied.", ) +@click.option( + "--integration", + type=str, + default=None, + help="Name of deployment integration that this server will be deployed to " + "Currently supported options are None for default inference and 'sagemaker' for " + "inference deployment with AWS Sagemaker", +) def start_server( host: str, port: int, @@ -244,6 +265,7 @@ def start_server( task: str, model_path: str, batch_size: int, + integration: str, ): """ Start a DeepSparse inference server for serving the models and pipelines given @@ -263,7 +285,7 @@ def start_server( alias: question_answering/sparse_quantized """ set_logging_level(getattr(logging, log_level.upper())) - server_config_to_env(config_file, task, model_path, batch_size) + server_config_to_env(config_file, task, model_path, batch_size, integration) filename = Path(__file__).stem package = "deepsparse.server" app_name = f"{package}.{filename}:server_app_factory" From 390b8029357a2076eb9f697d024caa6c209c5494 Mon Sep 17 00:00:00 2001 From: Benjamin Date: Fri, 22 Apr 2022 17:37:12 -0400 Subject: [PATCH 2/4] documentation, sample config, dockerfile fixes --- examples/aws-sagemaker/Dockerfile | 15 +- examples/aws-sagemaker/README.md | 265 +++++++++++++++++++++++++++++ examples/aws-sagemaker/config.yaml | 5 + 3 files changed, 282 insertions(+), 3 deletions(-) create mode 100644 examples/aws-sagemaker/README.md create mode 100644 examples/aws-sagemaker/config.yaml diff --git a/examples/aws-sagemaker/Dockerfile b/examples/aws-sagemaker/Dockerfile index 9d0d1e0a9e..6d678aeac1 100644 --- a/examples/aws-sagemaker/Dockerfile +++ b/examples/aws-sagemaker/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.8-bullseye +FROM python:3.8 ARG config_path=./config.yaml @@ -19,6 +19,15 @@ ENV PATH="$VIRTUAL_ENV/bin:$PATH" RUN python3 -m venv $VIRTUAL_ENV && \ pip3 install --no-cache-dir --upgrade pip && \ - pip3 install --no-cache-dir "deepsparse[server]" + pip3 install --no-cache-dir "deepsparse-nightly[server]" # TODO: switch to deepsparse[server] >= 0.12 -ENTRYPOINT ["deepsparse.server", "--port", "8080", "--config_file", "/root/server-config.yaml"] \ No newline at end of file +# create 'serve' command for sagemaker entrypoint +RUN mkdir /opt/server/ +RUN echo "#! /bin/bash" > /opt/server/serve +RUN echo "deepsparse.server --port 8080 --config_file /root/server-config.yaml" >> /opt/server/serve +RUN chmod 777 /opt/server/serve + +ENV PATH="/opt/server:${PATH}" +WORKDIR /opt/server + +ENTRYPOINT ["bash", "/opt/server/serve"] diff --git a/examples/aws-sagemaker/README.md b/examples/aws-sagemaker/README.md new file mode 100644 index 0000000000..572e1e58b3 --- /dev/null +++ b/examples/aws-sagemaker/README.md @@ -0,0 +1,265 @@ + + +# Deploy DeepSparse with Amazon SageMaker + +[Amazon SageMaker](https://docs.aws.amazon.com/sagemaker/index.html) +offers easy to use infrastructure for deploying deep learning models at scale. +This directory provides a guided example for deploying a +[DeepSparse](https://github.com/neuralmagic/deepsparse) inference server on SageMaker. +Using both of these tools, deployments benefit from sparse-CPU acceleration from +DeepSparse and automatic scaling from SageMaker. + + +## Contents +In addition to the step-by-step instructions in this guide, this directory contains +additional files to aide in the deployment. + +### Dockerfile +The included `Dockerfile` builds an image on top of the standard `python:3.8` image +with `deepsparse` installed and creates an executable command `serve` that runs +`deepsparse.server` on port 8080. SageMaker will execute this image by running +`docker run serve` and expects the image to serve inference requests at the +`invocations/` endpoint. + +For general customization of the server, changes should not need to be made +to the dockerfile, but to the `config.yaml` file that the dockerfile reads from +instead. + +### config.yaml +`config.yaml` used to configure the DeepSparse serve running in the Dockerfile. +It is important that the config contains the line `integration: sagemaker` so +endpoints may be provisioned correctly to match SageMaker specifications. + +Notice that the `model_path` and `task` are set to run a sparse-quantized +question-answering model from [SparseZoo](https://sparsezoo.neuralmagic.com/). +To use a model directory stored in `s3`, set `model_path` to `/opt/ml/model` in +the config and add `ModelDataUrl=` to the `CreateModel` arguments. +SageMaker will automatically copy the files from the s3 path into `/opt/ml/model` +which the server can then read from. + +More information on the DeepSparse server and its configuration can be found +[here](https://github.com/neuralmagic/deepsparse/tree/main/src/deepsparse/server#readme). + + +## Deploying to SageMaker +The following steps are required to provision and deploy DeepSparse to sagemaker +for inference: +* Build the DeepSparse-SageMaker `Dockerfile` into a local docker image +* Create an [Amazon ECR](https://aws.amazon.com/ecr/) repository to host the image +* Push the image to the ECR repository +* Create a SageMaker `Model` that reads from the hosted ECR image +* Build a SageMaker `EndpointConfig` that defines how to provision the model deployment +* Launch the SageMaker `Endpoint` defined by the `Model` and `EndpointConfig` + +### Requirements +The listed steps can be easily completed using a `python` and `bash`. The following +credentials, tools, and libraries are also required: +* The [`aws` cli](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html) that is [configured](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-quickstart.html) +* The [ARN of an AWS role](https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-roles.html) your user has access to that has full SageMaker and ECR permissions. In the following steps, we will refer to this as `ROLE_ARN`. It should take the form `"arn:aws:iam::XXX:role/service-role/XXX"` +* [Docker and the `docker` cli](https://docs.docker.com/get-docker/) +* The `boto3` python AWS sdk (`pip install boto3`) + +### Build the DeepSparse-SageMaker image locally +The `Dockerfile` can be build from this directory from a bash shell using the following command. +The image will be tagged locally as `deepsparse-sagemaker-example`. + +```bash +docker build -t deepsparse-sagemaker-example . +``` + +### Create an ECR Repository +The following code snippet can be used in python to create an ECR repository. +The `region_name` can be swapped to a preferred region. The repository will be named +`deepsparse-sagemaker`. If the repository is already created, this step may be skipped. + +```python +import boto3 + +ecr = boto3.client("ecr", region_name='us-east-1') +cr_res = ecr.create_repository(repositoryName="sagemaker-1") +``` + +### Push local image to ECR Repository +Once the image is built and the ECR repository is created, the image can be pushed using the following +bash commands. + +```bash +account=$(aws sts get-caller-identity --query Account | sed -e 's/^"//' -e 's/"$//') +region=$(aws configure get region) +ecr_account=${account}.dkr.ecr.${region}.amazonaws.com + +aws ecr get-login-password --region $region | docker login --username AWS --password-stdin $ecr_account +fullname=$ecr_account/deepsparse-example:latest + +docker tag deepsparse-sagemaker-example:latest $fullname +docker push $fullname +``` + +An abbreviated successful output will look like: +``` +Login Succeeded +The push refers to repository [XXX.dkr.ecr.us-east-1.amazonaws.com/deepsparse-example] +3c2284f66840: Preparing +08fa02ce37eb: Preparing +a037458de4e0: Preparing +bafdbe68e4ae: Preparing +a13c519c6361: Preparing +6817758dd480: Waiting +6d95196cbe50: Waiting +e9872b0f234f: Waiting +c18b71656bcf: Waiting +2174eedecc00: Waiting +03ea99cd5cd8: Pushed +585a375d16ff: Pushed +5bdcc8e2060c: Pushed +latest: digest: sha256:XXX size: 3884 +``` + +### Create SageMaker Model +A SageMaker `Model` can now be created referencing the pushed image. +The example model will be named `question-answering-example`. +As mentioned in the requirements, `ROLE_ARN` should be a string arn of an AWS +role with full access to SageMaker. + +```python +sm_boto3 = boto3.client("sagemaker", region_name="us-east-1") + +region = boto3.Session().region_name +account_id = boto3.client("sts").get_caller_identity()["Account"] + +image_uri = "{}.dkr.ecr.{}.amazonaws.com/deepsparse-example:latest".format(account_id, region) + +cm_res = sm_boto3.create_model( + ModelName="question-answering-example", + Containers=[ + { + "Image": image_uri, + }, + ], + ExecutionRoleArn=ROLE_ARN, + EnableNetworkIsolation=False, +) +``` + +More information about options for configuring SageMaker `Model` instances can +be found [here](https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_CreateModel.html). + + +### Build SageMaker EndpointConfig +The `EndpointConfig` is used to set the instance type to provision, how many, scaling +rules, and other deployment settings. The following code snippet defines an endpoint +with a single machine using an `ml.c5.large` CPU. + +* [Full list of available instances](https://docs.aws.amazon.com/sagemaker/latest/dg/notebooks-available-instance-types.html) (See Compute optimized (no GPUs) section) +* [EndpointConfig documentation and options](https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_CreateEndpointConfig.html) + +```python +model_name = "question-answering-example" # model defined above +initial_instance_count = 1 +instance_type = "ml.c5.large" + +variant_name = "QuestionAnsweringDeepSparseDemo" # ^[a-zA-Z0-9](-*[a-zA-Z0-9]){0,62} + +production_variants = [ + { + "VariantName": variant_name, + "ModelName": model_name, + "InitialInstanceCount": initial_instance_count, + "InstanceType": instance_type, + } +] + +endpoint_config_name = "QuestionAnsweringExampleConfig" # ^[a-zA-Z0-9](-*[a-zA-Z0-9]){0,62} + +endpoint_config = { + "EndpointConfigName": endpoint_config_name, + "ProductionVariants": production_variants, +} + +endpoint_config_res = sm_boto3.create_endpoint_config(**endpoint_config) +``` + +### Launch SageMaker Endpoint +Once the `EndpointConfig` is defined, the endpoint can be easily launched using +the `create_endpoint` command: + +```python +endpoint_name = "question-answering-example-endpoint" +endpoint_res = sm_boto3.create_endpoint( + EndpointName=endpoint_name, EndpointConfigName=endpoint_config_name +) +``` + +After creating the endpoint, it's status can be checked by running the following. +Initially, the `EndpointStatus` will be `Creating`. Checking after the image is +successfully launched, it will be `InService`. If there are any errors, it will +become `Failed`. + +```python +print(sm_boto3.describe_endpoint(EndpointName=endpoint_name)) +``` + + +## Making a reqest to the Endpoint +After the endpoint is in service, requests can be made to it through the +`invoke_endpoint` api. Inputs will be passed as a json payload. + +```python +import json + +sm_runtime = boto3.client("sagemaker-runtime", region_name="us-east-1") + +body = json.dumps( + dict( + question="Where do I live?", + context="I am a student and I live in Cambridge", + ) +) + +content_type = "application/json" +accept = "text/plain" + +res = sm_runtime.invoke_endpoint( + EndpointName=endpoint_name, + Body=body, + ContentType=content_type, + Accept=accept, +) + +print(res["body"].readlines()) +``` + + +### Cleanup +The model and endpoint can be deleted with the following commands: +```python +sm_boto3.delete_endpoint(EndpointName=endpoint_name) +sm_boto3.delete_endpoint_config(EndpointConfigName=endpoint_config_name) +sm_boto3.delete_model(ModelName=model_name) +``` + +## Next Steps +These steps create an invokable SageMaker inference endpoint powered with the DeepSparse +engine. The `EndpointConfig` settings may be adjusted to set instance scaling rules based +on deployment needs. + +More information on deploying custom models with SageMaker can be found +[here](https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html). + +Open an [issue](https://github.com/neuralmagic/deepsparse/issues) +or reach out to the [DeepSparse community](https://join.slack.com/t/discuss-neuralmagic/shared_invite/zt-q1a1cnvo-YBoICSIw3L1dmQpjBeDurQ) +with any issues, questions, or ideas. diff --git a/examples/aws-sagemaker/config.yaml b/examples/aws-sagemaker/config.yaml new file mode 100644 index 0000000000..cfca35a27c --- /dev/null +++ b/examples/aws-sagemaker/config.yaml @@ -0,0 +1,5 @@ +models: + - task: question_answering + model_path: zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/pruned_quant-moderate + batch_size: 1 +integration: sagemaker From ead0d4db01210a3f5f8a206f191b111fcead5609 Mon Sep 17 00:00:00 2001 From: Benjamin Date: Fri, 22 Apr 2022 17:45:47 -0400 Subject: [PATCH 3/4] fix ecr repo name --- examples/aws-sagemaker/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/aws-sagemaker/README.md b/examples/aws-sagemaker/README.md index 572e1e58b3..de784d95b0 100644 --- a/examples/aws-sagemaker/README.md +++ b/examples/aws-sagemaker/README.md @@ -90,7 +90,7 @@ The `region_name` can be swapped to a preferred region. The repository will be n import boto3 ecr = boto3.client("ecr", region_name='us-east-1') -cr_res = ecr.create_repository(repositoryName="sagemaker-1") +cr_res = ecr.create_repository(repositoryName="deepsparse-sagemaker") ``` ### Push local image to ECR Repository From 2c3c7e9534b20937cf3fca098dbcb8d9ab9c4825 Mon Sep 17 00:00:00 2001 From: Benjamin Date: Fri, 22 Apr 2022 18:05:58 -0400 Subject: [PATCH 4/4] readme code changes from testing --- examples/aws-sagemaker/README.md | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/examples/aws-sagemaker/README.md b/examples/aws-sagemaker/README.md index de784d95b0..315c480391 100644 --- a/examples/aws-sagemaker/README.md +++ b/examples/aws-sagemaker/README.md @@ -90,7 +90,7 @@ The `region_name` can be swapped to a preferred region. The repository will be n import boto3 ecr = boto3.client("ecr", region_name='us-east-1') -cr_res = ecr.create_repository(repositoryName="deepsparse-sagemaker") +create_repository_res = ecr.create_repository(repositoryName="deepsparse-sagemaker") ``` ### Push local image to ECR Repository @@ -103,7 +103,7 @@ region=$(aws configure get region) ecr_account=${account}.dkr.ecr.${region}.amazonaws.com aws ecr get-login-password --region $region | docker login --username AWS --password-stdin $ecr_account -fullname=$ecr_account/deepsparse-example:latest +fullname=$ecr_account/deepsparse-sagemaker:latest docker tag deepsparse-sagemaker-example:latest $fullname docker push $fullname @@ -141,9 +141,9 @@ sm_boto3 = boto3.client("sagemaker", region_name="us-east-1") region = boto3.Session().region_name account_id = boto3.client("sts").get_caller_identity()["Account"] -image_uri = "{}.dkr.ecr.{}.amazonaws.com/deepsparse-example:latest".format(account_id, region) +image_uri = "{}.dkr.ecr.{}.amazonaws.com/deepsparse-sagemaker:latest".format(account_id, region) -cm_res = sm_boto3.create_model( +create_model_res = sm_boto3.create_model( ModelName="question-answering-example", Containers=[ { @@ -210,7 +210,8 @@ successfully launched, it will be `InService`. If there are any errors, it will become `Failed`. ```python -print(sm_boto3.describe_endpoint(EndpointName=endpoint_name)) +from pprint import pprint +pprint(sm_boto3.describe_endpoint(EndpointName=endpoint_name)) ``` @@ -240,7 +241,7 @@ res = sm_runtime.invoke_endpoint( Accept=accept, ) -print(res["body"].readlines()) +print(res["Body"].readlines()) ```