### Build and push image

In [2]:
from sagemaker import image_uris
region = "us-east-1"
image_uri = image_uris.retrieve(framework='pytorch',region=region,version='1.9.0',image_scope='inference',instance_type='ml.c5.4xlarge')
account_id = image_uri.split(".")[0]

In [3]:
!$(aws ecr get-login --region $region --no-include-email --registry-ids $account_id)

https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded


In [4]:
# Read in the Dockerfile
with open('container/Dockerfile', 'r') as file :
  filedata = file.read()

# Update the image_uri
filedata = filedata.replace('{image_uri}', image_uri)

# Write the Dockerfile out again
with open('container/Dockerfile', 'w') as file:
  file.write(filedata)

In [5]:
%%sh

# The name of our algorithm
algorithm_name=sagemaker-pt-profiler

cd container

account=$(aws sts get-caller-identity --query Account --output text)

# Get the region defined in the current configuration (default to us-west-2 if none defined)
region=$(aws configure get region)
region=${region:-us-east-1}

fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest"

# If the repository doesn't exist in ECR, create it.
aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1

if [ $? -ne 0 ]
then
    aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null
fi

# Get the login command from ECR and execute it directly
$(aws ecr get-login --region ${region} --no-include-email --registry-ids 763104351884)

# Build the docker image locally with the image name and then push it to ECR
# with the full name.

docker build  -t ${algorithm_name} .
docker tag ${algorithm_name} ${fullname}

docker push ${fullname}

Login Succeeded
Sending build context to Docker daemon  10.75kB
Step 1/3 : FROM 763104351884.dkr.ecr.us-east-1.amazonaws.com/pytorch-inference:1.9.0-cpu-py38
 ---> edad66663723
Step 2/3 : RUN pip3 install codeguru_profiler_agent
 ---> Using cache
 ---> 3a1d4521631b
Step 3/3 : COPY Files/handler_service.py  /opt/conda/lib/python3.8/site-packages/sagemaker_pytorch_serving_container/handler_service.py
 ---> Using cache
 ---> 82d07a30e9f2
Successfully built 82d07a30e9f2
Successfully tagged sagemaker-pt-profiler:latest
The push refers to repository [171503325295.dkr.ecr.us-east-1.amazonaws.com/sagemaker-pt-profiler]
fbef3b18c6d8: Preparing
a27ad3a8a916: Preparing
77ed370a3fd1: Preparing
96e26b2e8a77: Preparing
a307804fdf8b: Preparing
5cc0f5722a41: Preparing
a6db19c017f8: Preparing
b51aacff698c: Preparing
0b754a16db36: Preparing
5b1afb4a605a: Preparing
a98f4a7568e6: Preparing
e6edda8500e4: Preparing
c2d6116b18c6: Preparing
b960da073da9: Preparing
58d122106d65: Preparing
b0a5b545ad92: Prepari

https://docs.docker.com/engine/reference/commandline/login/#credentials-store



In [None]:
import boto3

account_id = boto3.client("sts").get_caller_identity()["Account"]
region = boto3.Session().region_name
image_uri = "{}.dkr.ecr.{}.amazonaws.com/sagemaker-pt-profiler:latest".format(account_id,region)

### Deploy our Model to an Endpoint
Our container has been pushed to ECR and our Model is in S3 now we have everything we need to Deploy to a SageMaker Endpoint.

In [None]:
import sagemaker
from sagemaker import get_execution_role, Session

sess = Session()

role = get_execution_role()

bucket = "sagemaker-sample-files"
key = "datasets/image/MNIST/model/pytorch-training-2020-11-21-22-02-56-203/model.tar.gz"

pt_model= "s3://{}/{}".format(bucket,key)

In [None]:
%%writefile inference.py

import json
import logging
import sys
import os

import torch
import torch.nn as nn
import torch.nn.functional as F

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler(sys.stdout))

# Based on https://github.com/pytorch/examples/blob/master/mnist/main.py
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# defining model and loading weights to it.
def model_fn(model_dir): 
    model = Net()   
    with open(os.path.join(model_dir, "model.pth"), "rb") as f:
        model.load_state_dict(torch.load(f))
    model.to(device).eval()
    return model


# data preprocessing
def input_fn(request_body, request_content_type):
    assert request_content_type == "application/json"
    data = json.loads(request_body)["inputs"]
    data = torch.tensor(data, dtype=torch.float32, device=device)
    return data


# inference
def predict_fn(input_object, model):
    with torch.no_grad():
        prediction = model(input_object)
    return prediction


# postprocess
def output_fn(predictions, content_type):
    assert content_type == "application/json"
    res = predictions.cpu().numpy().tolist()
    return json.dumps(res)

In [None]:
client = boto3.client('codeguruprofiler')
pytorch_profiling_group_name = "SageMaker-PyTorch"

retrieve_latest_features_boto3_client_create_profiling_group = client.create_profiling_group(
    profilingGroupName=pytorch_profiling_group_name,

)


In [None]:
from sagemaker.pytorch import PyTorchModel
model = PyTorchModel(
    entry_point="inference.py",
    role=role,
    env={
    "PROFILING_GROUP_NAME": pytorch_profiling_group_name
    },
    model_data=pt_model,
    framework_version="1.9.0",
    image_uri=image_uri
)

In [None]:
%%time
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer

instance_type = "ml.c4.xlarge"

predictor = model.deploy(
    initial_instance_count=1,
    instance_type=instance_type,
    serializer=JSONSerializer(),
    deserializer=JSONDeserializer(),
)

In [None]:
%%time
import random
import numpy as np

dummy_data = {"inputs": np.random.rand(16, 1, 28, 28).tolist()}

timeout = 300 #5min/300s

timeout_start = time.time()

while time.time() < timeout_start + timeout:
    test = 0
    if test == 5:
        break
    test -= 1
    prediction = predictor.predict(dummy_data)
