# SageMaker BYO Inference container (nginx, gunicorn, Flask)

In [None]:
!pip install -U awscli boto3 sagemaker rich rich-cli watermark --quiet

In [None]:
%load_ext watermark
%load_ext rich

%watermark -p awscli,boto3,sagemaker,xgboost,sklearn,rich

### Build and test custom inference image locally

In [None]:
!cat Dockerfile

In [None]:
# build a local image
!docker build -t nginx .

### Launch Inference container locally

- Mount the [models](./models) directory to `/opt/ml/model` directory inside the container
- Map container port 8080 to host port 8080
- `docker run --rm -v $(pwd)/models:/opt/ml/model -p 8080:8080 nginx`


In [None]:
# Open terminal and run this command to launch container locally
# docker run --rm -v $(pwd)/models:/opt/ml/model -p 8080:8080 nginx

In [None]:
# Ping local inference endpoint
!curl http://localhost:8080/ping

#### Test records for inference can be found in [test.csv](./test.csv)

In [None]:
# Send test records to /invocations on the endpoint
!curl --data-raw '-1.3317586042173168,-1.1425409076053987,-1.0579488602777858,-1.177706547272754,-1.130662184748842,-1.1493955859050584,-1.139968767909096,0.0,1.0,0.0' -H 'Content-Type: text/csv' \
-v http://localhost:8080/invocations


### Tag and push the local image to private ECR

Now tag the `nginx` local image to ECR format `{account_id}.dkr.ecr.{region}.amazonaws.com/{imagename}:{tag}` format

Run [./build_n_push.sh](./build_n_push.sh) shell script with image name `nginx` as parameter

In [None]:
!chmod +x ./build_n_push.sh
!./build_n_push.sh nginx

## Deploy your model to SageMaker Endpoint using custom inference image

- Step 1: SageMaker session initialize
- Step 2: Compress your model to `model.tar.gz` format and upload to s3
- Step 3: Create Model object with your custom inference image 
- Step 4: Deploy model

#### Step 1: Initialize Session and upload model artifacts to S3

In [None]:
import boto3
import sagemaker
import os
import tarfile
from rich import print
from sagemaker import get_execution_role, session
from sagemaker.s3 import S3Downloader, S3Uploader, s3_path_join

sm_session = session.Session()
region = sm_session._region_name
role = get_execution_role()
bucket = sm_session.default_bucket()
sm_client = boto3.client("sagemaker")

prefix = "sagemaker/abalone"
account_id = boto3.client("sts").get_caller_identity().get("Account")
model_s3uri = s3_path_join(f"s3://{bucket}/{prefix}", "models/byoc/nginx")

print(f"Role: {role}")
print(f"Bucket: {bucket}")
print(f"Model base: {model_s3uri}")

S3Downloader.list(model_s3uri)

model_path = os.path.join("./models", "xgboost-model")
model_output_path = os.path.join("./models", "model.tar.gz")

if not os.path.exists(model_output_path):
    print(f"Compressing model to {model_output_path}")
    tar = tarfile.open(model_output_path, "w:gz")
    tar.add(model_path, arcname="xgboost-model")
    tar.close()
else:
    print(f"Model file exists: {model_output_path}")

S3Uploader.upload(
    local_path=model_output_path,
    desired_s3_uri=model_s3uri,
    sagemaker_session=sm_session,
)

#### Step 2: Create model object with custom inference image

In [None]:
from datetime import datetime
from uuid import uuid4
from sagemaker.model import Model

image_name = "nginx"
ecr_image = f"{account_id}.dkr.ecr.{region}.amazonaws.com/{image_name}:latest"
suffix = f"{str(uuid4())[:5]}-{datetime.now().strftime('%d%b%Y')}"

model_data_url = s3_path_join(model_s3uri, "model.tar.gz")
print(f"model_image_uri: {ecr_image}")
model_name = f"AbaloneXGB-predictor-{suffix}"

print(f"Creating model : {model_name} with {model_data_url}")

predictor_model = Model(
    image_uri=ecr_image,
    name=model_name,
    model_data=model_data_url,
    role=role,
    sagemaker_session=sm_session,
)

#### Step 3: Deploy model to endpoint

In [None]:
endpoint_name = f"Abalone-nginx-ep-{suffix}"

print(f"Deploying model: {model_name}")
predictor = predictor_model.deploy(
    endpoint_name=endpoint_name, initial_instance_count=1, instance_type="ml.m5.xlarge", wait=False
)

### Wait for endpoint to be InService

In [None]:
from rich import print

resp = sm_client.describe_endpoint(EndpointName=endpoint_name)
status = resp["EndpointStatus"]
print(f"Endpoint [b]{endpoint_name}[/b] Status: [i]{status}[/i]")

# Get the waiter object
waiter = sm_client.get_waiter("endpoint_in_service")
# Apply the waiter on the endpoint
waiter.wait(EndpointName=endpoint_name)

# Get endpoint status using describe endpoint
resp = sm_client.describe_endpoint(EndpointName=endpoint_name)
status = resp["EndpointStatus"]
print(f"Endpoint [b]{endpoint_name}[/b] Status: [i]{status}[/i]")

### Test real-time endpoint on SageMaker with inference records
[test.csv](./test.csv)

In [None]:
import json
import os
from time import sleep, time

import numpy as np
import pandas as pd
from sagemaker.serializers import CSVSerializer

runtime_sm_client = boto3.client("sagemaker-runtime")

LOCALDIR = "."

local_test_dataset = f"{LOCALDIR}/test.csv"

limit = 10
i = 0

with open(local_test_dataset, "r") as _f:
    for row in _f:
        if i == 0:
            print(f"Headers")
            print(row)
            print("---" * 20)
            i += 1
        elif i <= limit:
            row = row.rstrip("\n")
            splits = row.split(",")
            # Remove the target column (first column)
            label = splits.pop(0)
            input_cols = ",".join(s for s in splits)
            prediction = None
            try:
                print(f"Invoking EP with record")
                # print(input_cols)
                prediction = runtime_sm_client.invoke_endpoint(
                    EndpointName=endpoint_name,
                    ContentType="text/csv",
                    Body=input_cols,
                )
                # print(prediction["Body"].read())
                response = prediction["Body"].read().decode("utf-8")
                # print(response)
                print(f"True: {label} | Predicted: {response}")
                i += 1
                sleep(0.5)
            except Exception as e:
                print(f"Prediction error: {e}")
                pass

### Verify Logs emitted by the endpoint in CloudWatch

In [None]:
from datetime import timedelta

logs_client = boto3.client("logs")
end_time = datetime.utcnow()
start_time = end_time - timedelta(minutes=15)

log_group_name = f"/aws/sagemaker/Endpoints/{endpoint_name}"
log_streams = logs_client.describe_log_streams(logGroupName=log_group_name)
log_stream_name = log_streams["logStreams"][0]["logStreamName"]

# Retrieve the logs
logs = logs_client.get_log_events(
    logGroupName=log_group_name,
    logStreamName=log_stream_name,
    startTime=int(start_time.timestamp() * 1000),
    endTime=int(end_time.timestamp() * 1000),
)

# Print the logs
for event in logs["events"]:
    print(f"{datetime.fromtimestamp(event['timestamp'] // 1000)}: {event['message']}")

### Cleanup

In [None]:
# Delete endpoint, endpoint_configuration and model
print(f"EP: {endpoint_name}")

try:
    print(f"Deleting endpoint: {endpoint_name}")
    sm_client.delete_endpoint(EndpointName=endpoint_name)
except Exception as e:
    print(f"Error deleting EP: {endpoint_name}\n{e}")
    pass