### Build and push image

In [None]:
%%sh

# The name of our algorithm
algorithm_name=sagemaker-tf-profiler

cd container

account=$(aws sts get-caller-identity --query Account --output text)

# Get the region defined in the current configuration (default to us-east-1 if none defined)
region=$(aws configure get region)
region=${region:-us-east-1}

fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest"

# If the repository doesn't exist in ECR, create it.
aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1

if [ $? -ne 0 ]
then
    aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null
fi

# Get the login command from ECR and execute it directly
$(aws ecr get-login --region ${region} --no-include-email)

# Build the docker image locally with the image name and then push it to ECR
# with the full name.

docker build  -t ${algorithm_name} .
docker tag ${algorithm_name} ${fullname}

docker push ${fullname}

In [None]:
import boto3

account_id = boto3.client("sts").get_caller_identity()["Account"]
region = boto3.Session().region_name
image_uri = "{}.dkr.ecr.{}.amazonaws.com/sagemaker-tf-profiler:latest".format(account_id,region)

### Deploy our Model to an Endpoint
Our container has been pushed to ECR and our Model is in S3 now we have everything we need to Deploy to a SageMaker Endpoint.

In [None]:
%%time
saved_model = (
    "s3://sagemaker-sample-data-{}/tensorflow/model/resnet/resnet_50_v2_fp32_NCHW.tar.gz".format(
        region
    )
)

In [None]:
import sagemaker

role = sagemaker.get_execution_role()

In [None]:
%%writefile inference.py

import json
import time

def input_handler(data, context):
    """ Pre-process request input before it is sent to TensorFlow Serving REST API
    Args:
        data (obj): the request data, in format of dict or string
        context (Context): an object containing request and configuration details
    Returns:
        (dict): a JSON-serializable dict that contains request body and headers
    """
    time.sleep(2)
    if context.request_content_type == 'application/json':
        # pass through json (assumes it's correctly formed)
        d = data.read().decode('utf-8')
        return d if len(d) else ''

    if context.request_content_type == 'text/csv':
        # very simple csv handler
        return json.dumps({
            'instances': [float(x) for x in data.read().decode('utf-8').split(',')]
        })

    raise ValueError('{{"error": "unsupported content type {}"}}'.format(
        context.request_content_type or "unknown"))


def output_handler(data, context):
    """Post-process TensorFlow Serving output before it is returned to the client.
    Args:
        data (obj): the TensorFlow serving response
        context (Context): an object containing request and configuration details
    Returns:
        (bytes, string): data to return to client, response content type
    """
    if data.status_code != 200:
        raise ValueError(data.content.decode('utf-8'))
    time.sleep(2)
    response_content_type = context.accept_header
    prediction = data.content
    return prediction, response_content_type

In [None]:
from sagemaker.tensorflow.serving import Model
from sagemaker.local import LocalSession

tensorflow_model = Model(model_data=saved_model,
                         role=role,
                         entry_point = "inference.py",
                         image_uri=image_uri
                        
                        )

In [None]:
%%time
predictor = tensorflow_model.deploy(
    initial_instance_count=1, instance_type="ml.m5.xlarge"
)

In [None]:
%%time
import numpy as np

random_input = np.random.rand(1, 1, 3, 3)

for i in range (0,1):
    prediction = predictor.predict({"inputs": random_input.tolist()})