In [12]:
%load_ext autoreload
%autoreload 2

# Deployment notebook

This notebook is used to deploy the endpoint using the Sagemaker SDK, both locally and 
online. This is not meant to be the main source of endpoint provision, which should be
done with terraform through the CD pipeline, but rather this is a way to test that
everything works before provisioning it.

It also register the model in the model registry for CD provisioning later.


--- 

**Note**: this notebook must be run outside of the `dev environment` container. This is 
because the sagemaker local development container can't spin up.

The development workflow is as following: 
- All the development happens inside the dev container
- Only when there is the need to run the notebook, this is run from another vscode 
window connected with ssh only
- The `inference.py` script should be tested with their invidual functions, eg: as shown
in the `aws/endpoint/src/tests/` folder. Once these work as expected, only then the you
should execute the notebook. This is a huge time-saver, because the notebook can be
very slow to run.

---

Before running the cells, make sure you login to AWS using either:

- `aws configure sso` → for first time login
- `aws sso login` → for all subsequent login

In [1]:
# general settings, shared between local and online deployments

model_name = "musicgen"
model_entry_point = "../src/code/inference.py"
model_data = "../model/model.tar.gz"

endpoint_name = "endpoint-musicgen-0001-dev"

In [2]:
# set local temp folder to avoid /tmp to become full
import os
from pathlib import Path

repo_root_dir = Path(os.getcwd()).parents[2].resolve()
local_temp_folder_path = str(repo_root_dir / ".temp" / "sagemaker_local")

## Local

In [None]:
!pip install sagemaker[local]

In [5]:
import sagemaker
from sagemaker.local import LocalSession
from sagemaker.pytorch import PyTorchModel

session = LocalSession()

session.config = {
    "local": {
        "local_code": True,
        "container_root": local_temp_folder_path,
    }
}

session.settings = sagemaker.session_settings.SessionSettings(
    local_download_dir = local_temp_folder_path
)

role = sagemaker.get_execution_role()

print("Role:", role)
print("Local temp folder path:", local_temp_folder_path)

Role: arn:aws:iam::138140302683:role/aws-reserved/sso.amazonaws.com/AWSReservedSSO_AdministratorAccess_6f1d7369dc867f6b
Local temp folder path: /home/ubuntu/musicgen-endpoint-ableton/.temp/sagemaker_local


In [18]:
from sagemaker.deserializers import JSONDeserializer
from sagemaker.serializers import JSONSerializer

# model_image_uri = sagemaker.image_uris.retrieve(
#     framework="pytorch",
#     region="us-east-1",
#     version="2.0",
#     py_version="py310",
#     image_scope="inference",
#     instance_type="ml.g4dn.xlarge",
# )

model_image_uri = "public.ecr.aws/s0f8z6e9/musicgen-pytorch:1.0"

model = PyTorchModel(
    name=model_name,
    role=role,
    entry_point=model_entry_point,
    model_data=model_data,
    image_uri=model_image_uri,
    sagemaker_session=session,
)

predictor = model.deploy(
    initial_instance_count=1,
    instance_type="local_gpu",
    endpoint_name=endpoint_name,
    sagemaker_session=session,
    serializer=JSONSerializer(),
    deserializer=JSONDeserializer(),
)

Using the short-lived AWS credentials found in session. They might expire while running.


Attaching to 7wble4m30k-algo-1-oo19j
7wble4m30k-algo-1-oo19j  | [0m['torchserve', '--start', '--model-store', '/.sagemaker/ts/models', '--ts-config', '/etc/sagemaker-ts.properties', '--log-config', '/opt/conda/lib/python3.10/site-packages/sagemaker_pytorch_serving_container/etc/log4j2.xml', '--models', 'model=/opt/ml/model']
7wble4m30k-algo-1-oo19j  | 2023-06-14T17:56:24,833 [INFO ] main org.pytorch.serve.servingsdk.impl.PluginsManager - Initializing plugins manager...
7wble4m30k-algo-1-oo19j  | 2023-06-14T17:56:24,905 [INFO ] main org.pytorch.serve.metrics.configuration.MetricConfiguration - Successfully loaded metrics configuration from /opt/conda/lib/python3.10/site-packages/ts/configs/metrics.yaml
7wble4m30k-algo-1-oo19j  | 2023-06-14T17:56:25,048 [INFO ] main org.pytorch.serve.ModelServer - 
7wble4m30k-algo-1-oo19j  | Torchserve version: 0.8.0
7wble4m30k-algo-1-oo19j  | TS Home: /opt/conda/lib/python3.10/site-packages
7wble4m30k-algo-1-oo19j  | Current directory: /
7wble4m30k-alg

## Online

In [1]:
import boto3
import sagemaker
from sagemaker.huggingface.model import HuggingFaceModel

boto_session = boto3.Session()
client = boto3.client(service_name="sagemaker")

sagemaker_session = sagemaker.Session()

sagemaker_session.settings = sagemaker.session_settings.SessionSettings(
    local_download_dir = local_temp_folder_path
)

role = "arn:aws:iam::138140302683:role/service-role/AmazonSageMaker-ExecutionRole-20230522T162566"

In [13]:
# step 1: create the model

model = HuggingFaceModel(
    name=model_name,
    role=role,
    entry_point=model_entry_point,
    model_data=model_data,
    image_uri=model_image_uri,
    sagemaker_session=sagemaker_session,
)

In [14]:
# step 2: register the model

model.register(
    model_package_group_name="ai-module-group-name",
    content_types=["application/json"],
    response_types=["application/json"],
    inference_instances=["ml.g4dn.xlarge"],
    approval_status="Approved",
)

<sagemaker.model.ModelPackage at 0x7f6bf15d3af0>

In [5]:
# step 3: create endpoint

predictor = model.deploy(
    initial_instance_count=1,
    instance_type="ml.g4dn.xlarge",
    endpoint_name=endpoint_name,
)

-------------!

# Predictor tests

In [22]:
import base64

def base64_to_audio_file(base64_string: str, audio_file_path: str) -> None:
    """Converts a base64-encoded string in an audio file"""

    with open(audio_file_path, "wb") as audio_file:
        audio_file.write(base64.b64decode((base64_string)))

input_data = {
    "prompt": "berghain acid techno",
    "duration": 15,
    "temperature": 1.0,
    "top_p": 0.0,
    "top_k": 250,
    "cfg_coefficient": 3.0,
}

In [23]:
response = predictor.predict(data=input_data)
print("Response:", response)

base64_audio = response["result"]["prediction"]
base64_to_audio_file(base64_audio, "predictor_response.mp3")

7wble4m30k-algo-1-oo19j  | 2023-06-14T17:58:18,390 [INFO ] epollEventLoopGroup-3-2 TS_METRICS - ts_inference_requests_total.Count:1.0|#model_name:model,model_version:default|#hostname:8654bb86a19c,timestamp:1686765498
7wble4m30k-algo-1-oo19j  | 2023-06-14T17:58:18,390 [INFO ] W-9000-model_1.0 org.pytorch.serve.wlm.WorkerThread - Flushing req.cmd PREDICT to backend at: 1686765498390
7wble4m30k-algo-1-oo19j  | 2023-06-14T17:58:18,391 [INFO ] W-9000-model_1.0-stdout MODEL_LOG - Backend received inference at: 1686765498
7wble4m30k-algo-1-oo19j  | 2023-06-14T17:58:18,392 [INFO ] W-9000-model_1.0-stdout MODEL_LOG - Received request with data: prompt='berghain acid techno' duration=15.0 temperature=1.0 top_p=0.0 top_k=250 cfg_coefficient=3.0
7wble4m30k-algo-1-oo19j  | 2023-06-14T17:58:18,409 [INFO ] W-9000-model_1.0-stdout MODEL_LOG - New pattern, time steps: 750, sequence steps: 754
7wble4m30k-algo-1-oo19j  | 2023-06-14T17:59:18,198 [INFO ] W-9000-model_1.0-stdout MODEL_LOG - Output function

In [None]:
import boto3
import json

runtime_client = boto3.client('sagemaker-runtime')

endpoint_name = "endpoint-image-generation-0001-dev"

response = runtime_client.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType="application/json",
    Accept="application/json",
    Body=json.dumps(input_data),
)

response_body = json.loads(response["Body"].read().decode())

In [12]:
# delete endpoint 
# NOTE: this doesn't delete the model in the s3 bucket, nor it deletes the model from
# model registry

predictor.delete_model()
predictor.delete_endpoint()