# GEC Model Deployment

## Setup and Imports

In [None]:
# %%capture insall_out --no-stderr

!pip install huggingface_hub transformers boto3 sagemaker wandb


In [18]:
!pip install -U sagemaker

Collecting sagemaker
  Downloading sagemaker-2.199.0.tar.gz (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m:00:01[0m0:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting boto3<2.0,>=1.33.3 (from sagemaker)
  Downloading boto3-1.33.6-py3-none-any.whl.metadata (6.7 kB)
Collecting uvicorn==0.22.0 (from sagemaker)
  Downloading uvicorn-0.22.0-py3-none-any.whl (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting fastapi==0.95.2 (from sagemaker)
  Downloading fastapi-0.95.2-py3-none-any.whl.metadata (24 kB)
Collecting pydantic!=1.7,!=1.7.1,!=1.7.2,!=1.7.3,!=1.8,!=1.8.1,<2.0.0,>=1.6.2 (from fastapi==0.95.2->sagemaker)
  Downloading pydantic-1.10.13-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (149 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m149.6/149.6 kB[0m 

In [2]:
# !pip install transformers==4.28.1

In [22]:
%%capture insall_out --no-stderr

# !pip install --upgrade sagemaker wandb

In [3]:
import transformers
from transformers import AutoTokenizer #, AutoModelForTokenClassification
from transformers.models.auto.modeling_auto import AutoModelForTokenClassification
from transformers import TokenClassificationPipeline
import boto3 
from huggingface_hub import notebook_login
from transformers import pipeline
import sagemaker
import os
import wandb

from sagemaker.huggingface import HuggingFaceModel

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [4]:
# notebook_login()

In [5]:
transformers.__version__

'4.35.2'

In [19]:
sagemaker.__version__

'2.197.0'

In [7]:
role = sagemaker.get_execution_role()
region = boto3.Session().region_name
account_id = boto3.client("sts").get_caller_identity()["Account"]

# The name of our algorithm -- i.e. the name of the inference container
INFERENCE_ALGORITHM_NAME = "sm-gec-aws"
ENDPOINT_NAME = "sm-gec-aws"
IMAGE_URI_INFERENCE = (
    f"{account_id}.dkr.ecr.{region}.amazonaws.com/{INFERENCE_ALGORITHM_NAME}:latest"
)
HF_MODEL_ID = 'ramsenth/langbot-gec'
HF_TASK = 'token-classification'
instance_type = "ml.m5.large" #"ml.p2.xlarge"

# see deep learning containers (DLC) available images here:
# https://github.com/aws/deep-learning-containers/blob/master/available_images.md 
MODEL_IMAGE_URL=f'763104351884.dkr.ecr.{region}.amazonaws.com/huggingface-pytorch-inference:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04'
# model_image_url=f'763104351884.dkr.ecr.{region}.amazonaws.com/djl-inference:0.25.0-deepspeed0.11.0-cu118'

print(account_id, region, role)

# session = boto3.Session(profile_name=profile_name)
session = boto3.Session()
sm_client = session.client('sagemaker', region_name=region)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
571667364805 us-west-2 arn:aws:iam::571667364805:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole


## Try HuggingFace Inference

In [27]:
pipe = pipeline(HF_TASK, model=HF_MODEL_ID)

In [28]:
pipe('Sí, tengo algo de tiempos hoy.')

[{'entity': 'B-na',
  'score': 0.62121904,
  'index': 6,
  'word': 'tiempos',
  'start': 18,
  'end': 25}]

In [29]:
tokenizer = AutoTokenizer.from_pretrained(HF_MODEL_ID)
model = AutoModelForTokenClassification.from_pretrained(HF_MODEL_ID)

pipe = TokenClassificationPipeline(model=model, tokenizer=tokenizer)
pipe("Sí, tengo algo de tiempos hoy.")

[{'entity': 'B-na',
  'score': 0.62121904,
  'index': 6,
  'word': 'tiempos',
  'start': 18,
  'end': 25}]

In [5]:
# session = boto3.Session(profile_name=profile_name)
session = boto3.Session()
sm_client = session.client('sagemaker', region_name=region)
response = sm_client.list_endpoints()
print(response)

{'Endpoints': [], 'ResponseMetadata': {'RequestId': '9d05280b-b21c-4892-9ceb-2baec58f32d5', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '9d05280b-b21c-4892-9ceb-2baec58f32d5', 'content-type': 'application/x-amz-json-1.1', 'content-length': '16', 'date': 'Wed, 29 Nov 2023 23:49:39 GMT'}, 'RetryAttempts': 0}}


## Prepare Model Tar File For Deployment
Before we can deploy we need to prepare the tar file. Steps:
* Download model artifacts from WandB
* Add the custom inference script
* Create tar file
* Upload tarfile to S3 bucket

In [8]:
# WandB artifact id
WANDB_ARTIFACT='langbot/langbot_gec_plain_top_performers/model-beto_pytorch_final:latest'
S3_ARTIFACT = 's3://project-langbot-models/gec-simple-model-hf-pytorch-custom-infer.tar.gz'
S3_WORKING_ARTIFACT = 's3://project-langbot-models/gec-simple-model-hf-pytorch-custom-infer-working.tar.gz'

def download_model(artifact_id):
    run = wandb.init()
    artifact = run.use_artifact(artifact_id, type='model')
    artifact_dir = artifact.download()
    wandb.finish()
    return artifact_dir


In [9]:
artifact_dir = download_model(WANDB_ARTIFACT)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/ec2-user/.netrc


[34m[1mwandb[0m: Downloading large artifact model-beto_pytorch_final:latest, 417.77MB. 7 files... 
[34m[1mwandb[0m:   7 of 7 files downloaded.  
Done. 0:0:10.8


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

In [10]:
# Test the downloaded model
def test(model_loc):
    tokenizer = AutoTokenizer.from_pretrained(model_loc)
    model = AutoModelForTokenClassification.from_pretrained(model_loc)
    pipe = TokenClassificationPipeline(model=model, tokenizer=tokenizer)
    return pipe('Sí, tengo algo de tiempos hoy.')

test(artifact_dir)

[{'entity': 'B-na',
  'score': 0.8623703,
  'index': 6,
  'word': 'tiempos',
  'start': 18,
  'end': 25}]

In [11]:
# Create the code folder and add the inference script
!mkdir {artifact_dir}/code

In [12]:
from transformers import AutoModelForTokenClassification, AutoTokenizer, TokenClassificationPipeline
import logging
import json
import re

JSON_CONTENT_TYPE = 'application/json'
THRESHOLD = 0.3
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

def model_fn(model_dir):
    # implement custom code to load the model
    tokenizer = AutoTokenizer.from_pretrained(model_dir)
    model = AutoModelForTokenClassification.from_pretrained(model_dir)
    pipe = TokenClassificationPipeline(model=model, tokenizer=tokenizer)
    logger.info(f"model_fn:: Loaded model from model_dir={model_dir}")
    return pipe

def input_fn(serialized_input_data, content_type=JSON_CONTENT_TYPE):
    # decode the input data  (e.g. JSON string -> dict)
    if content_type == JSON_CONTENT_TYPE:
        input_data = json.loads(serialized_input_data)
        return input_data['line']
    else:
        raise Exception('Requested unsupported ContentType in Accept: ' + content_type)

def _translate(data, model_ret):
    # Parse model output which has :
    # [{'entity': 'B-na',
    #   'score': 0.7923039,
    #   'index': 6,
    #   'word': 'tiempos',
    #   'start': 18,
    #   'end': 25}]

    # Split input to words
    words = re.findall(r'\b\w+\b|\[.*?\]\{.*?\}\<.*?\>|\S', data)
    lookup = {}
    for cur in model_ret:
        score = float(cur['score'])
        if score > THRESHOLD:
            start = int(cur['start'])
            end = int(cur['end'])
            word = data[start: end]
            cur['matched'] = False
            lookup[word] = cur

    # print(lookup)
    output = []
    for word in words:
        if word in lookup:
            cur['matched'] = True
            output.append({word: cur['entity']})
        else:
            output.append({word: 'O'})
    return output
        
def predict_fn(data, model):
    outputs = model(data)
    translated = _translate(data, outputs)
    for cur in outputs:
        cur['score'] = str(cur['score'])
        cur['index'] = str(cur['index'])
        cur['start'] = str(cur['start'])
        cur['end'] = str(cur['end'])
    final = {
        'result': translated,
        'model_response': json.dumps(outputs)
    }

    return json.dumps(final)

def output_fn(prediction, accept):
    return prediction


In [13]:
artifact_dir

'/home/ec2-user/SageMaker/model_gec_ram/deployment/artifacts/model-beto_pytorch_final:v14'

In [14]:
# Create the tar file and upload to s3
!tar -czf gec-simple-model-hf-pytorch-custom-infer.tar.gz --directory={artifact_dir} .
!aws s3 cp ./gec-simple-model-hf-pytorch-custom-infer.tar.gz {S3_ARTIFACT}
!rm gec-simple-model-hf-pytorch-custom-infer.tar.gz

upload: ./gec-simple-model-hf-pytorch-custom-infer.tar.gz to s3://project-langbot-models/gec-simple-model-hf-pytorch-custom-infer.tar.gz


## Deploy Model

In [15]:
!pwd

/home/ec2-user/SageMaker/model_gec_ram/deployment


In [22]:
from sagemaker.huggingface.model import HuggingFaceModel


huggingface_model = HuggingFaceModel(
    # model_data=S3_ARTIFACT,  # path to your trained SageMaker model
    model_data=S3_WORKING_ARTIFACT,
    role=role,                                            # IAM role with permissions to create an endpoint
    transformers_version="4.6.1",                           # Transformers version used
    pytorch_version="1.7.1",                                # PyTorch version used
    py_version='py36',                                    # Python version used
)

# deploy model to SageMaker Inference
huggingface_model.deploy(
    endpoint_name = ENDPOINT_NAME,
    initial_instance_count=1,
    instance_type='ml.m5.xlarge'
)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
----!

<sagemaker.huggingface.model.HuggingFacePredictor at 0x7f2fce863070>

# Test Endpoint

In [23]:
response = sm_client.list_endpoints()
print(response)

{'Endpoints': [{'EndpointName': 'sm-gec-aws', 'EndpointArn': 'arn:aws:sagemaker:us-west-2:571667364805:endpoint/sm-gec-aws', 'CreationTime': datetime.datetime(2023, 12, 4, 19, 19, 14, 337000, tzinfo=tzlocal()), 'LastModifiedTime': datetime.datetime(2023, 12, 4, 19, 21, 18, 177000, tzinfo=tzlocal()), 'EndpointStatus': 'InService'}, {'EndpointName': 'sm-llm-aws', 'EndpointArn': 'arn:aws:sagemaker:us-west-2:571667364805:endpoint/sm-llm-aws', 'CreationTime': datetime.datetime(2023, 12, 4, 18, 32, 42, 21000, tzinfo=tzlocal()), 'LastModifiedTime': datetime.datetime(2023, 12, 4, 18, 37, 23, 50000, tzinfo=tzlocal()), 'EndpointStatus': 'InService'}], 'ResponseMetadata': {'RequestId': '3e9dc959-4c7e-4108-a4ae-ecc36da2aaf7', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '3e9dc959-4c7e-4108-a4ae-ecc36da2aaf7', 'content-type': 'application/x-amz-json-1.1', 'content-length': '422', 'date': 'Mon, 04 Dec 2023 19:21:44 GMT'}, 'RetryAttempts': 0}}


In [24]:
# WAIT FOR ENDPOINT TO BE "IN SERVICE" BEFORE PROCEEDING WITH THIS STEP

# invoke endpoint by endpoint name
import json
sm_runtime = session.client("sagemaker-runtime", region_name=region)

content_type = "application/json"

def test(text):
    # specify "Inputs"
    data = {
       "line": text #"Sí, tengo algo de tiempos hoy."
    }

    response = sm_runtime.invoke_endpoint(
        EndpointName = 'sm-gec-aws',
        ContentType = content_type,
        Body=json.dumps(data)
    )
    print(response)
    print(response["Body"].read())
    

texts = [
    'Estoy bienes, gracias.',
    'Sí, tengo algo de tiempos hoy.',
    'Sí, necesito comprar un chaqueta.',
    'A las dieza.',
    'Hasta luega.'
]
[test(text) for text in texts]

test('')
# pair 1
# Assistant: Hola, ¿cómo estás? (Hello, how are you?)
# user_input = "Estoy bienes, gracias."
# input_error = "the word, bienes, has a number disagreement error."
# next_question = "¿Estás libre hoy?"

# pair 2
# user_input = "Sí, tengo algo de tiempos hoy."
# input_error = "the word, tiempos, has a number disagreement error."
# next_question = "¿Quieres ir de compras conmigo?"

# pair 3
# user_input = "Sí, necesito comprar un chaqueta."
# input_error = "the word, un, has a gender disagreement error."
# next_question = "¿A qué hora te gustaría ir?"

# pair 4
# user_input = "A las dieza."
# input_error = "the word, dieza, has a gender disagreement error."
# next_question = "Vale, nos vemos luego."

# pair 5
# user_input = "Hasta luega."
# input_error = "the word, luega, has a gender disagreement error."
# next_question = "adiós!"

{'ResponseMetadata': {'RequestId': 'd1cef595-c5c5-46a1-bffb-e492b7f370f7', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'd1cef595-c5c5-46a1-bffb-e492b7f370f7', 'x-amzn-invoked-production-variant': 'AllTraffic', 'date': 'Mon, 04 Dec 2023 19:21:50 GMT', 'content-type': 'application/json', 'content-length': '266', 'connection': 'keep-alive'}, 'RetryAttempts': 0}, 'ContentType': 'application/json', 'InvokedProductionVariant': 'AllTraffic', 'Body': <botocore.response.StreamingBody object at 0x7f2fce81ead0>}
b'{"result": [{"Estoy": "O"}, {"bienes": "B-ga"}, {",": "O"}, {"gracias": "O"}, {".": "O"}], "model_response": "[{\\"word\\": \\"bienes\\", \\"score\\": \\"0.34608975052833557\\", \\"entity\\": \\"B-ga\\", \\"index\\": \\"2\\", \\"start\\": \\"6\\", \\"end\\": \\"12\\", \\"matched\\": true}]"}'
{'ResponseMetadata': {'RequestId': '44972f26-302b-42b9-aa3c-a52f524169f2', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '44972f26-302b-42b9-aa3c-a52f524169f2', 'x-amzn-

## **DEPRECATED** Trial 1 - Custom Deploy 

In [15]:
# set model name and endpoint configuration name
import time
timestamp = time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
model_name = INFERENCE_ALGORITHM_NAME
endpoint_config_name = INFERENCE_ALGORITHM_NAME
endpoint_name = INFERENCE_ALGORITHM_NAME
print(model_name)
print(endpoint_config_name)
print(endpoint_name)

sm-gec-aws
sm-gec-aws
sm-gec-aws


In [17]:
model_name

'sm-gec-aws'

In [18]:
print(MODEL_IMAGE_URL)

# set container config
container_config = {
    'Image': MODEL_IMAGE_URL,
    'ModelDataUrl': 's3://project-langbot-models/gec-simple-model-hf-pytorch.tar.gz',
    'Mode': 'SingleModel',
    'Environment': {
        # 'HF_MODEL_ID': HF_MODEL_ID,
        'HF_TASK' : HF_TASK,
        'SAGEMAKER_CONTAINER_LOG_LEVEL' : '20',
        'SAGEMAKER_REGION' : region
    }
}
print(container_config)

# create model
# ... models console: https://console.aws.amazon.com/sagemaker/home?#/models
response = sm_client.create_model(
    ModelName=model_name,
    PrimaryContainer=container_config,
    ExecutionRoleArn=role, 
    EnableNetworkIsolation=False
)
print(response)

763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-inference:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04
{'Image': '763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-inference:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04', 'ModelDataUrl': 's3://project-langbot-models/gec-simple-model-hf-pytorch.tar.gz', 'Mode': 'SingleModel', 'Environment': {'HF_TASK': 'token-classification', 'SAGEMAKER_CONTAINER_LOG_LEVEL': '20', 'SAGEMAKER_REGION': 'us-west-2'}}
{'ModelArn': 'arn:aws:sagemaker:us-west-2:571667364805:model/sm-gec-aws', 'ResponseMetadata': {'RequestId': '59a40e8c-8459-4974-a461-c139ba8e5265', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '59a40e8c-8459-4974-a461-c139ba8e5265', 'content-type': 'application/x-amz-json-1.1', 'content-length': '72', 'date': 'Wed, 29 Nov 2023 05:21:58 GMT'}, 'RetryAttempts': 0}}


In [19]:
# create endpoint config
# ... endpoint configs console: https://console.aws.amazon.com/sagemaker/home?#/endpointConfig
endpoint_config_response = sm_client.create_endpoint_config(
   EndpointConfigName=endpoint_config_name,
   ProductionVariants=[
        {
            "ModelName": model_name,
            "VariantName": "AllTraffic",
            'InitialInstanceCount': 1,
            'InstanceType': instance_type,
            'EnableSSMAccess': False
        }
    ]
)
print(endpoint_config_response)

print('Endpoint configuration name: {}'.format(endpoint_config_name))
print('Endpoint configuration arn:  {}'.format(endpoint_config_response['EndpointConfigArn']))

{'EndpointConfigArn': 'arn:aws:sagemaker:us-west-2:571667364805:endpoint-config/sm-gec-aws', 'ResponseMetadata': {'RequestId': '79a27733-b71e-4a4d-ac25-a20400f0c11a', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '79a27733-b71e-4a4d-ac25-a20400f0c11a', 'content-type': 'application/x-amz-json-1.1', 'content-length': '91', 'date': 'Wed, 29 Nov 2023 05:22:17 GMT'}, 'RetryAttempts': 0}}
Endpoint configuration name: sm-gec-aws
Endpoint configuration arn:  arn:aws:sagemaker:us-west-2:571667364805:endpoint-config/sm-gec-aws


In [20]:
# create endpoint
# ... endpoints console: https://console.aws.amazon.com/sagemaker/home?#/endpoints
endpoint_response = sm_client.create_endpoint(
    EndpointName=endpoint_name,
    EndpointConfigName=endpoint_config_name
)
print(endpoint_response)

print('Endpoint name: {}'.format(endpoint_name))
print('Endpoint arn:  {}'.format(endpoint_response['EndpointArn']))

{'EndpointArn': 'arn:aws:sagemaker:us-west-2:571667364805:endpoint/sm-gec-aws', 'ResponseMetadata': {'RequestId': '96258e00-75ee-4ec6-9946-af03caf9ab79', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '96258e00-75ee-4ec6-9946-af03caf9ab79', 'content-type': 'application/x-amz-json-1.1', 'content-length': '78', 'date': 'Wed, 29 Nov 2023 05:22:21 GMT'}, 'RetryAttempts': 0}}
Endpoint name: sm-gec-aws
Endpoint arn:  arn:aws:sagemaker:us-west-2:571667364805:endpoint/sm-gec-aws


## **DEPRECATED** Trial 2: Use HuggingFace

In [None]:
!pip install sagemaker --upgrade

In [61]:
os.environ['HF_TRUST_REMOTE_CODE']='True'

In [7]:
from sagemaker.huggingface.model import HuggingFaceModel


hub = {
  'HF_MODEL_ID': 'dslim/bert-base-NER', #HF_MODEL_ID,
  'HF_TASK':HF_TASK
}

hub = {
  'HF_MODEL_ID': HF_MODEL_ID,
  'HF_TASK':HF_TASK
}

# 763104351884.dkr.ecr.{region}.amazonaws.com/huggingface-pytorch-inference:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04

huggingface_model = HuggingFaceModel(
   model_data="s3://project-langbot-models/gec-simple-model-hf-pytorch.tar.gz",  # path to your trained SageMaker model
   role=role,                                            # IAM role with permissions to create an endpoint
   transformers_version="4.26",                           # Transformers version used
   pytorch_version="1.13",                                # PyTorch version used
   py_version='py39',                                    # Python version used
)

# # Using model hosted on HF
# huggingface_model = HuggingFaceModel(
#     transformers_version='4.26',
#     pytorch_version='1.13',
#     py_version='py39',
#     env=hub,
#     role=role,
# )

# deploy model to SageMaker Inference
huggingface_model.deploy(
    endpoint_name = ENDPOINT_NAME,
    initial_instance_count=1,
    instance_type='ml.m5.xlarge'
)

# # create Hugging Face Model Class
# huggingface_model = HuggingFaceModel(
#    model_data="s3://models/my-bert-model/model.tar.gz",  # path to your trained SageMaker model
#    role=role,                                            # IAM role with permissions to create an endpoint
#    transformers_version="4.28",                          # Transformers version used
#    pytorch_version="2.0.0",                              # PyTorch version used
#    py_version='py39',                                    # Python version used
# )

# # deploy model to SageMaker Inference
# predictor = huggingface_model.deploy(
#    initial_instance_count=1,
#    instance_type=instance_type
# )


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
-----!

<sagemaker.huggingface.model.HuggingFacePredictor at 0x7f9e67c66da0>

# **DEPRECATED** Old Implementation

## Imports and Setup

In [5]:
import sys
sys.path.append('../')

In [None]:
!pip install wandb tensorflow transformers

In [None]:
!pip show boto3
!pip install boto3 --upgrade

In [3]:
import os

In [None]:
%%sh
# pip uninstall -y -q awscli
# pip install awscli
# pip install q -U sagemaker-ssh-helper
# pip freeze | grep sagemaker-ssh-helper

In [35]:
import sys
from utils import Config
import boto3
from sagemaker import get_execution_role
from model_utils import download_simple_model
import os
import boto3
import sagemaker
import pprint

pp = pprint.PrettyPrinter(indent=1)

role = sagemaker.get_execution_role()
print(role)
sess = sagemaker.Session()
sm_boto3 = boto3.client("sagemaker")
s3 = boto3.resource('s3')
region = boto3.Session().region_name
account_id = boto3.client("sts").get_caller_identity()["Account"]

# The name of our algorithm -- i.e. the name of the inference container
INFERENCE_ALGORITHM_NAME = "sm-gec-aws"
ENDPOINT_NAME = "sm-gec-aws"
IMAGE_URI_INFERENCE = (
    f"{account_id}.dkr.ecr.{region}.amazonaws.com/{INFERENCE_ALGORITHM_NAME}:latest"
)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
arn:aws:iam::571667364805:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


## Model Useless - Inference Endpoint Configuration

## Simple GEC - Inference Endpoint Configuration

In [None]:
S3_BUCKET = 'project-langbot-models'
S3_DATA_KEY = f'gec_simple_model_{RUN_TO_DEPLOY}_weights.gz'
PRETRAINED_MODEL_DATA = 's3://{}/{}'.format(S3_BUCKET, S3_DATA_KEY)

In [None]:
!cat model-useless/Dockerfile-useless-inference

In [None]:
!pygmentize model-useless/code/useless-inference.py

In [62]:
!pwd

/home/ec2-user/SageMaker/model_gec_ram/deployment


In [63]:
! cd model-useless && sh build_and_push.sh {INFERENCE_ALGORITHM_NAME} Dockerfile-useless-inference && cd ..

ECR image fullname: 571667364805.dkr.ecr.us-west-2.amazonaws.com/sm-gec-aws:latest
https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
Sending build context to Docker daemon  15.87kB
Step 1/8 : ARG REGION=us-west-2
Step 2/8 : FROM 763104351884.dkr.ecr.$REGION.amazonaws.com/tensorflow-inference:2.12.1-gpu-py310-cu118-ubuntu20.04-sagemaker
 ---> fde65ca56ee6
Step 3/8 : ENV PATH="/opt/ml/code:${PATH}"
 ---> Using cache
 ---> be571fb97baa
Step 4/8 : COPY /code /opt/ml/code
 ---> bf4d7aa16d06
Step 5/8 : ENV SAGEMAKER_SUBMIT_DIRECTORY /opt/ml/code
 ---> Running in 7f49132b7e2a
Removing intermediate container 7f49132b7e2a
 ---> 74b39635edaf
Step 6/8 : ENV SAGEMAKER_PROGRAM useless-inference.py
 ---> Running in 954c5b329f81
Removing intermediate container 954c5b329f81
 ---> 58defa8f975b
Step 7/8 : RUN pip install --no-cache-dir --upgrade pip &&     pip insta

In [64]:
!pwd

/home/ec2-user/SageMaker/model_gec_ram/deployment


In [67]:
from sagemaker import Model
from sagemaker.tensorflow import TensorFlowModel
import sagemaker as sage

sess = sage.Session()

instance_type = "ml.p2.xlarge"

estimator = TensorFlowModel(
    model_data=PRETRAINED_MODEL_DATA,
    source_dir="model-useless/code",
    role=role, 
    entry_point="useless_inference.py",
    image_uri=IMAGE_URI_INFERENCE,
    sagemaker_session=sess)

# estimator = Model(
#     image_uri=IMAGE_URI_INFERENCE,
#     model_data=PRETRAINED_MODEL_DATA,
#     role=role,
#     source_dir="model-useless/code",
#     entry_point="useless-inference.py",
#     sagemaker_session=sess
# )

# ssh_wrapper = SSHEstimatorWrapper.create(estimator, connection_wait_time_seconds=0, local_user_id=local_user_id, log_to_stdout=True)

# ssh_wrapper = SSHModelWrapper.create(estimator, connection_wait_time_seconds=0)

# deploy the model
predictor = estimator.deploy(1, instance_type, endpoint_name=ENDPOINT_NAME, wait=True)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
-------!

### Imports And Setup

In [None]:
!pip install --upgrade tensorflow sagemaker wandb

In [36]:
# Simple GEC specific constants
RUN_TO_DEPLOY = '3emhdbgu' # The wandb run from which to get model weights
S3_BUCKET = 'project-langbot-models'
S3_DATA_KEY = f'gec_simple_model_{RUN_TO_DEPLOY}_weights.gz'
PRETRAINED_MODEL_DATA = 's3://{}/{}'.format(S3_BUCKET, S3_DATA_KEY)

### Push Model Weights To S3 From WandB

In [21]:
# Upload the model weights from WandB to S3 bucket
def copy_weights_to_s3(runid, download=True):
    main_args = Config()
    run_ref = main_args.SIMPLE_MODEL_RUNS[runid]
    weights_filename = 'model_weights.gz'
    if download:
        download_simple_model(run_ref, weights_filename)
    # Push the downloaded weights to s3 bucket
    role = get_execution_role()
    s3.meta.client.upload_file(f'downloads/{weights_filename}', S3_BUCKET, S3_DATA_KEY)
    return

In [22]:
copy_weights_to_s3(RUN_TO_DEPLOY, download=True)

Found <Run langbot/langbot_gec_plain_top_performers/3emhdbgu (finished)> to load artifact from
Downloading model_weights.gz
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [46]:
# Confirm model weights are there on S3
print(PRETRAINED_MODEL_DATA)
s3.Bucket(S3_BUCKET).Object(S3_DATA_KEY)

s3://project-langbot-models/gec_simple_model_3emhdbgu_weights.gz


s3.Object(bucket_name='project-langbot-models', key='gec_simple_model_3emhdbgu_weights.gz')

### DockerFile

In [37]:
!cat model-simple/Dockerfile-inference

# Based on
# https://github.com/aws/amazon-sagemaker-examples/blob/main/advanced_functionality/tensorflow_iris_byom/tensorflow_BYOM_iris.ipynb

ARG REGION=us-west-2

# SageMaker TF image for INFERENCE
FROM 763104351884.dkr.ecr.$REGION.amazonaws.com/tensorflow-inference:2.12.1-gpu-py310-cu118-ubuntu20.04-sagemaker

ENV PATH="/opt/ml/code:${PATH}"

# /opt/ml and all subdirectories are utilized by SageMaker, we use the /code subdirectory to store our user code.
COPY /gec /opt/ml/code

# this environment variable is used by the SageMaker TensorFlow container to determine our user code directory.
ENV SAGEMAKER_SUBMIT_DIRECTORY /opt/ml/code

# this environment variable is used by the SageMaker Tensorflow container to determine our program entry point
# for training and serving.
# For more information: https://github.com/aws/sagemaker-tensorflow-training-toolkit
ENV SAGEMAKER_PROGRAM gec-simple-inference.py

RUN pip install --no-cache-dir --upgrade pip && \
    pip install --no-cache-dir prot

### Inference script - gec-simple-inference.py

In [None]:
!pygmentize model-simple/gec/gec-simple-inference.py

### Build and Push Container

In [9]:
!pwd

/home/ec2-user/SageMaker/model_gec_ram/deployment


In [39]:
! cd model-simple && sh build_and_push.sh {INFERENCE_ALGORITHM_NAME} Dockerfile-inference && cd ..

ECR image fullname: 571667364805.dkr.ecr.us-west-2.amazonaws.com/sm-gec-aws:latest
https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
Sending build context to Docker daemon  72.19kB
Step 1/8 : ARG REGION=us-west-2
Step 2/8 : FROM 763104351884.dkr.ecr.$REGION.amazonaws.com/tensorflow-inference:2.12.1-gpu-py310-cu118-ubuntu20.04-sagemaker
 ---> fde65ca56ee6
Step 3/8 : ENV PATH="/opt/ml/code:${PATH}"
 ---> Using cache
 ---> be571fb97baa
Step 4/8 : COPY /gec /opt/ml/code
 ---> Using cache
 ---> 1385fbe8293b
Step 5/8 : ENV SAGEMAKER_SUBMIT_DIRECTORY /opt/ml/code
 ---> Using cache
 ---> 2da6d66a094c
Step 6/8 : ENV SAGEMAKER_PROGRAM gec-simple-inference.py
 ---> Using cache
 ---> b7f1e2dd5f29
Step 7/8 : RUN pip install --no-cache-dir --upgrade pip &&     pip install --no-cache-dir protobuf==3.20.* &&     pip install --no-cache-dir nvgpu smdebug transformer

### Create Inference Endpoint

In [48]:
!pwd

/home/ec2-user/SageMaker/model_gec_ram/deployment


In [56]:
from sagemaker import Model
from sagemaker.tensorflow import TensorFlowModel
import sagemaker as sage

sess = sage.Session()

# instance_type = "ml.m5.xlarge" # no GPU, will trigger an error
# instance_type = "ml.g4dn.xlarge"
instance_type = "ml.p2.xlarge"

# endpoint_deployment_name = "sm-gec-aws"
# estimator = TensorFlowModel(
#     model_data=PRETRAINED_MODEL_DATA, 
#     source_dir="model-simple/gec",
#     role=role, 
#     entry_point="gec-simple-inference.py", 
#     image_uri=IMAGE_URI_INFERENCE)

estimator = Model(
    image_uri=IMAGE_URI_INFERENCE,
    model_data=PRETRAINED_MODEL_DATA,
    role=role,
    source_dir="model-simple/gec",
    entry_point="gec-simple-inference.py",
    sagemaker_session=sess,  # not local session anymore
    #                   predictor_cls=None,
    #                   env=None,
    #                   name=None,
    #                   vpc_config=None,
    #                   enable_network_isolation=False,
    #                   model_kms_key=None,
    #                   image_config=None,
    #                   code_location=None,
    #                   container_log_level=20,
    #                   dependencies=None,
    #                   git_config=None
)

# ssh_wrapper = SSHEstimatorWrapper.create(estimator, connection_wait_time_seconds=0, local_user_id=local_user_id, log_to_stdout=True)

# ssh_wrapper = SSHModelWrapper.create(estimator, connection_wait_time_seconds=0)

# deploy the model
predictor = estimator.deploy(1, instance_type, endpoint_name=ENDPOINT_NAME, wait=True)



sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
--------!

## GEC With Explanation - Inference Endpoint Configuration

### Prerequisites

Pre trained NER model should be pickled and uploaded to the correct S3 bucket as a tarball. 

### Imports and Setup

In [1]:
BUCKET = "project-langbot-models"
KEY = "gec_cows_l2h_small.gz"
PRETRAINED_MODEL_DATA = "s3://{}/{}".format(BUCKET, KEY)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


### Check Saved Model

In [2]:
print(PRETRAINED_MODEL_DATA)
boto3.Session().resource("s3").Bucket(BUCKET).Object(KEY)

s3://project-langbot-models/gec_cows_l2h_small.gz


s3.Object(bucket_name='project-langbot-models', key='gec_cows_l2h_small.gz')

### Dockerfile

In [31]:
!cat model1/container/Dockerfile-inference

# Based on
# https://github.com/awslabs/amazon-sagemaker-examples/master/advanced_functionality/pytorch_extending_our_containers/pytorch_extending_our_containers.ipynb

ARG REGION=us-west-2

# SageMaker PyTorch image for INFERENCE
FROM 763104351884.dkr.ecr.$REGION.amazonaws.com/pytorch-inference:1.12.1-gpu-py38-cu113-ubuntu20.04-sagemaker

ENV PATH="/opt/ml/code:${PATH}"

# /opt/ml and all subdirectories are utilized by SageMaker, we use the /code subdirectory to store our user code.
COPY /gec /opt/ml/code

# this environment variable is used by the SageMaker PyTorch container to determine our user code directory.
ENV SAGEMAKER_SUBMIT_DIRECTORY /opt/ml/code

# this environment variable is used by the SageMaker PyTorch container to determine our program entry point
# for training and serving.
# For more information: https://github.com/aws/sagemaker-pytorch-container
ENV SAGEMAKER_PROGRAM gec-inference.py

RUN pip install --no-cache-dir --upgrade pip && \
    pip install --no-cache-dir n

### Inference script - gec-inference.py

In [4]:
!pygmentize container/gec/gec-inference.py

[34mimport[39;49;00m [04m[36mlogging[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36mos[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36mtorch[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36mjson[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36mpickle[39;49;00m[37m[39;49;00m
[37m[39;49;00m
JSON_CONTENT_TYPE = [33m'[39;49;00m[33mapplication/json[39;49;00m[33m'[39;49;00m[37m[39;49;00m
[37m[39;49;00m
logger = logging.getLogger([31m__name__[39;49;00m)[37m[39;49;00m
logger.setLevel(logging.DEBUG)[37m[39;49;00m
[37m[39;49;00m
[37m[39;49;00m
[34mdef[39;49;00m [32mmodel_fn[39;49;00m(model_dir):[37m[39;49;00m
    logger.info([33mf[39;49;00m[33m"[39;49;00m[33minside model_fn, model_dir= [39;49;00m[33m{[39;49;00mmodel_dir[33m}[39;49;00m[33m"[39;49;00m)[37m[39;49;00m
    device = [33m"[39;49;00m[33mcuda[39;49;00m[33m"[39;49;00m [34mif[39;49;00m torch.cuda.is_available() [34melse[39;49;00m [33m

### Build and Push Container

In [5]:
! cd container && sh build_and_push.sh {INFERENCE_ALGORITHM_NAME} Dockerfile-inference

ECR image fullname: 571667364805.dkr.ecr.us-west-2.amazonaws.com/sm-gec-aws:latest
https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
Sending build context to Docker daemon  17.92kB
Step 1/8 : ARG REGION=us-west-2
Step 2/8 : FROM 763104351884.dkr.ecr.$REGION.amazonaws.com/pytorch-inference:1.12.1-gpu-py38-cu113-ubuntu20.04-sagemaker
 ---> cc486ae090f7
Step 3/8 : ENV PATH="/opt/ml/code:${PATH}"
 ---> Using cache
 ---> ed6c9f68b5de
Step 4/8 : COPY /gec /opt/ml/code
 ---> Using cache
 ---> 923b5fcc86b5
Step 5/8 : ENV SAGEMAKER_SUBMIT_DIRECTORY /opt/ml/code
 ---> Using cache
 ---> fd25ac165e77
Step 6/8 : ENV SAGEMAKER_PROGRAM gec-inference.py
 ---> Using cache
 ---> 58bc40662a62
Step 7/8 : RUN pip install --no-cache-dir --upgrade pip &&     pip install --no-cache-dir numba==0.53.1 protobuf==3.20.* && pip install --no-cache-dir simpletransformers==0.64.3

### Create Inference Endpoint

In [6]:
from sagemaker import Model

import sagemaker as sage

sess = sage.Session()

# instance_type = "ml.m5.xlarge" # no GPU, will trigger an error
# instance_type = "ml.g4dn.xlarge"
instance_type = "ml.p2.xlarge"

# endpoint_deployment_name = "sm-gec-aws"

estimator = Model(
    image_uri=IMAGE_URI_INFERENCE,
    model_data=PRETRAINED_MODEL_DATA,
    role=role,
    source_dir="container/gec",
    entry_point="gec-inference.py",
    sagemaker_session=sess,  # not local session anymore
    #                   predictor_cls=None,
    #                   env=None,
    #                   name=None,
    #                   vpc_config=None,
    #                   enable_network_isolation=False,
    #                   model_kms_key=None,
    #                   image_config=None,
    #                   code_location=None,
    #                   container_log_level=20,
    #                   dependencies=None,
    #                   git_config=None
)

# deploy the model
predictor = estimator.deploy(1, instance_type, endpoint_name=ENDPOINT_NAME)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
-------!

# Optional cleanup of the create endpoint
The created endpoint can be deleted with the code below.

This part represent the end of the notebook.

In [25]:
import boto3

client = boto3.client("sagemaker")
response = client.describe_endpoint_config(EndpointConfigName=ENDPOINT_NAME)
model_name = response["ProductionVariants"][0]["ModelName"]
client.delete_endpoint(EndpointName=ENDPOINT_NAME)
# client.delete_endpoint_config(EndpointConfigName=ENDPOINT_NAME)
# client.delete_model(ModelName=model_name)

{'ResponseMetadata': {'RequestId': '9150081a-5ab9-42a7-bfa1-28533741574d',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '9150081a-5ab9-42a7-bfa1-28533741574d',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Mon, 04 Dec 2023 19:24:39 GMT'},
  'RetryAttempts': 0}}