# Configuring GEC Inference Endpoint


## Prerequisites

Pre trained NER model should be pickled and uploaded to the correct S3 bucket as a tarball. 

## Imports and Setup

In [1]:
import os
import boto3
import sagemaker
import pprint

pp = pprint.PrettyPrinter(indent=1)

role = sagemaker.get_execution_role()
sess = sagemaker.Session()

BUCKET = "project-langbot-models"
KEY = "gec_cows_l2h_small.gz"
PRETRAINED_MODEL_DATA = "s3://{}/{}".format(BUCKET, KEY)
# The name of our algorithm -- i.e. the name of the inference container
INFERENCE_ALGORITHM_NAME = "sm-gec-aws"
ENDPOINT_NAME = "sm-gec-aws"

sm_boto3 = boto3.client("sagemaker")
region = boto3.Session().region_name
account_id = boto3.client("sts").get_caller_identity()["Account"]

IMAGE_URI_INFERENCE = (
    f"{account_id}.dkr.ecr.{region}.amazonaws.com/{INFERENCE_ALGORITHM_NAME}:latest"
)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


## Check Saved Model

In [2]:
print(PRETRAINED_MODEL_DATA)
boto3.Session().resource("s3").Bucket(BUCKET).Object(KEY)

s3://project-langbot-models/gec_cows_l2h_small.gz


s3.Object(bucket_name='project-langbot-models', key='gec_cows_l2h_small.gz')

## Create Docker Container For Inference

### Dockerfile

In [3]:
!cat container/Dockerfile-inference

# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
#     http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.

# For more information on creating a Dockerfile
# https://docs.docker.com/compose/gettingstarted/#step-2-create-a-dockerfile
# https://github.com/awslabs/amazon-sagemaker-examples/master/advanced_functionality/pytorch_extending_our_containers/pytorch_extending_our_containers.ipynb

ARG REGION=us-west-2

# SageMaker PyTorch image for INFERENCE
FROM 763104351884.dkr.ecr.$REGION.amazonaws.com/pytorch-inference:1.12.1-gpu-py38-cu

### Inference script - gec-inference.py

In [4]:
!pygmentize container/gec/gec-inference.py

[34mimport[39;49;00m [04m[36mlogging[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36mos[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36mtorch[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36mjson[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36mpickle[39;49;00m[37m[39;49;00m
[37m[39;49;00m
JSON_CONTENT_TYPE = [33m'[39;49;00m[33mapplication/json[39;49;00m[33m'[39;49;00m[37m[39;49;00m
[37m[39;49;00m
logger = logging.getLogger([31m__name__[39;49;00m)[37m[39;49;00m
logger.setLevel(logging.DEBUG)[37m[39;49;00m
[37m[39;49;00m
[37m[39;49;00m
[34mdef[39;49;00m [32mmodel_fn[39;49;00m(model_dir):[37m[39;49;00m
    logger.info([33mf[39;49;00m[33m"[39;49;00m[33minside model_fn, model_dir= [39;49;00m[33m{[39;49;00mmodel_dir[33m}[39;49;00m[33m"[39;49;00m)[37m[39;49;00m
    device = [33m"[39;49;00m[33mcuda[39;49;00m[33m"[39;49;00m [34mif[39;49;00m torch.cuda.is_available() [34melse[39;49;00m [33m

### Build and Push Container

In [5]:
! cd container && sh build_and_push.sh {INFERENCE_ALGORITHM_NAME} Dockerfile-inference

ECR image fullname: 571667364805.dkr.ecr.us-west-2.amazonaws.com/sm-gec-aws:latest
https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
Sending build context to Docker daemon  17.92kB
Step 1/8 : ARG REGION=us-west-2
Step 2/8 : FROM 763104351884.dkr.ecr.$REGION.amazonaws.com/pytorch-inference:1.12.1-gpu-py38-cu113-ubuntu20.04-sagemaker
 ---> cc486ae090f7
Step 3/8 : ENV PATH="/opt/ml/code:${PATH}"
 ---> Using cache
 ---> ed6c9f68b5de
Step 4/8 : COPY /gec /opt/ml/code
 ---> Using cache
 ---> 923b5fcc86b5
Step 5/8 : ENV SAGEMAKER_SUBMIT_DIRECTORY /opt/ml/code
 ---> Using cache
 ---> fd25ac165e77
Step 6/8 : ENV SAGEMAKER_PROGRAM gec-inference.py
 ---> Using cache
 ---> 58bc40662a62
Step 7/8 : RUN pip install --no-cache-dir --upgrade pip &&     pip install --no-cache-dir numba==0.53.1 protobuf==3.20.* && pip install --no-cache-dir simpletransformers==0.64.3

## Create Inference Endpoint

In [6]:
from sagemaker import Model

import sagemaker as sage

sess = sage.Session()

# instance_type = "ml.m5.xlarge" # no GPU, will trigger an error
# instance_type = "ml.g4dn.xlarge"
instance_type = "ml.p2.xlarge"

# endpoint_deployment_name = "sm-gec-aws"

estimator = Model(
    image_uri=IMAGE_URI_INFERENCE,
    model_data=PRETRAINED_MODEL_DATA,
    role=role,
    source_dir="container/gec",
    entry_point="gec-inference.py",
    sagemaker_session=sess,  # not local session anymore
    #                   predictor_cls=None,
    #                   env=None,
    #                   name=None,
    #                   vpc_config=None,
    #                   enable_network_isolation=False,
    #                   model_kms_key=None,
    #                   image_config=None,
    #                   code_location=None,
    #                   container_log_level=20,
    #                   dependencies=None,
    #                   git_config=None
)

# deploy the model
predictor = estimator.deploy(1, instance_type, endpoint_name=ENDPOINT_NAME)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
-------!

## Test Endpoint

In [7]:
import json
sm_client = sess.sagemaker_runtime_client
# endpoint_name = "sm-gec-aws"
response = sm_client.invoke_endpoint(
    EndpointName=ENDPOINT_NAME,
    ContentType="application/json",
    Body=json.dumps({"line":"Hola Como estas?"}),
)

r = response["Body"]
print("RESULT r.read().decode():", r.read().decode())

RESULT r.read().decode(): {
  "output": [
    [
      {
        "Hola": "O"
      },
      {
        "Como": "O"
      },
      {
        "estas?": "O"
      }
    ]
  ]
}


## Optional cleanup of the create endpoint
The created endpoint can be deleted with the code below.

This part represent the end of the notebook.

In [8]:
import boto3

client = boto3.client("sagemaker")
response = client.describe_endpoint_config(EndpointConfigName=ENDPOINT_NAME)
model_name = response["ProductionVariants"][0]["ModelName"]
client.delete_endpoint(EndpointName=ENDPOINT_NAME)
# client.delete_endpoint_config(EndpointConfigName=ENDPOINT_NAME)
# client.delete_model(ModelName=model_name)

{'ResponseMetadata': {'RequestId': '3c23ebd8-0982-43c3-a797-b41f04869fa1',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '3c23ebd8-0982-43c3-a797-b41f04869fa1',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Tue, 14 Nov 2023 07:54:17 GMT'},
  'RetryAttempts': 0}}