In [1]:
# install AWS packages
%pip install boto3
%pip install sagemaker

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
# set profile name as opposed to entering credentials
profile_name = 'default'
region_name = 'us-west-2'

In [3]:
# get and test sagemaker client
import boto3 
session = boto3.Session(profile_name=profile_name)
sm_client = session.client('sagemaker', region_name=region_name)
response = sm_client.list_endpoints()
print(response)

{'Endpoints': [], 'ResponseMetadata': {'RequestId': 'f90666d4-1a09-4244-b11e-00ba21083ef4', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'f90666d4-1a09-4244-b11e-00ba21083ef4', 'content-type': 'application/x-amz-json-1.1', 'content-length': '16', 'date': 'Wed, 20 Jul 2022 03:10:19 GMT'}, 'RetryAttempts': 0}}


In [4]:
# set model name and endpoint configuration name
import time
ml_model_name = "multibert-text-classification"
timestamp = time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
model_name = ml_model_name + '-model' + timestamp
endpoint_config_name = ml_model_name + '-epc' + timestamp
endpoint_name = ml_model_name + '-ep' + timestamp
print(model_name)
print(endpoint_config_name)
print(endpoint_name)

multibert-text-classification-model-2022-07-20-03-10-32
multibert-text-classification-epc-2022-07-20-03-10-32
multibert-text-classification-ep-2022-07-20-03-10-32


In [5]:
# set sagemaker execution role
import sagemaker
# create a sagemaker execution role via the AWS SageMaker console, then paste in the arn here
role = 'arn:aws:iam::113969896847:role/service-role/AmazonSageMaker-ExecutionRole-20220714T204241'

In [6]:
# see deep learning containers (DLC) available images here:
# https://github.com/aws/deep-learning-containers/blob/master/available_images.md 
# <Replaced with the DLC image for appropriate version of PyTorch>
#model_image_url="763104351884.dkr.ecr."+region_name+".amazonaws.com/"+\
#                "huggingface-pytorch-inference:1.9-transformers4.12-cpu-py38-ubuntu20.04"
model_image_url="763104351884.dkr.ecr."+region_name+".amazonaws.com/"+\
                "pytorch-inference:1.11.0-cpu-py38-ubuntu20.04-sagemaker"
print(model_image_url)


# CHANGE THIS IF YOU WANT TO UPLAOD NEW MODEL!!!!!!!!! THE MODEL ARTIFACT IS HERE!!!!!
model_data_url = 's3://sagemaker-us-west-2-113969896847/'+\
                 'pytorch-training-2022-07-19-05-38-58-148/output/model.tar.gz'

# set container config
container_config = {
    'Image': model_image_url,
    'Mode': 'SingleModel',
    'ModelDataUrl': model_data_url,
    'Environment': {
#       'HF_MODEL_ID': 'distilbert-base-uncased-finetuned-sst-2-english',
#       'HF_TASK' : 'text-classification',
        'SAGEMAKER_CONTAINER_LOG_LEVEL' : '20',
        'SAGEMAKER_REGION' : region_name
    }
}
print(container_config)

# create model
# ... models console: https://console.aws.amazon.com/sagemaker/home?#/models
response = sm_client.create_model(
    ModelName=model_name,
    PrimaryContainer=container_config,
    ExecutionRoleArn=role, 
    EnableNetworkIsolation=False
)
print(response)

763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-inference:1.11.0-cpu-py38-ubuntu20.04-sagemaker
{'Image': '763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-inference:1.11.0-cpu-py38-ubuntu20.04-sagemaker', 'Mode': 'SingleModel', 'ModelDataUrl': 's3://sagemaker-us-west-2-113969896847/pytorch-training-2022-07-19-05-38-58-148/output/model.tar.gz', 'Environment': {'SAGEMAKER_CONTAINER_LOG_LEVEL': '20', 'SAGEMAKER_REGION': 'us-west-2'}}
{'ModelArn': 'arn:aws:sagemaker:us-west-2:113969896847:model/multibert-text-classification-model-2022-07-20-03-10-32', 'ResponseMetadata': {'RequestId': '28eb5584-dcc0-4ada-8848-b73886a1f55c', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '28eb5584-dcc0-4ada-8848-b73886a1f55c', 'content-type': 'application/x-amz-json-1.1', 'content-length': '117', 'date': 'Wed, 20 Jul 2022 03:17:02 GMT'}, 'RetryAttempts': 0}}


In [7]:
# create endpoint config
# ... endpoint configs console: https://console.aws.amazon.com/sagemaker/home?#/endpointConfig
endpoint_config_response = sm_client.create_endpoint_config(
   EndpointConfigName=endpoint_config_name,
   ProductionVariants=[
        {
            "ModelName": model_name,
            "VariantName": "AllTraffic",
            "ServerlessConfig": {
                # Specify MemorySizeInMB and MaxConcurrency in the serverless config object
                "MemorySizeInMB": 3072,
                "MaxConcurrency": 10
            }
        }
    ]
)
print(endpoint_config_response)

print('Endpoint configuration name: {}'.format(endpoint_config_name))
print('Endpoint configuration arn:  {}'.format(endpoint_config_response['EndpointConfigArn']))

{'EndpointConfigArn': 'arn:aws:sagemaker:us-west-2:113969896847:endpoint-config/multibert-text-classification-epc-2022-07-20-03-10-32', 'ResponseMetadata': {'RequestId': '72496bda-370f-421a-a4d5-ab62eb249175', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '72496bda-370f-421a-a4d5-ab62eb249175', 'content-type': 'application/x-amz-json-1.1', 'content-length': '134', 'date': 'Wed, 20 Jul 2022 03:20:32 GMT'}, 'RetryAttempts': 0}}
Endpoint configuration name: multibert-text-classification-epc-2022-07-20-03-10-32
Endpoint configuration arn:  arn:aws:sagemaker:us-west-2:113969896847:endpoint-config/multibert-text-classification-epc-2022-07-20-03-10-32


In [8]:
# create endpoint
# ... endpoints console: https://console.aws.amazon.com/sagemaker/home?#/endpoints
endpoint_response = sm_client.create_endpoint(
    EndpointName=endpoint_name,
    EndpointConfigName=endpoint_config_name
)
print(endpoint_response)

print('Endpoint name: {}'.format(endpoint_name))
print('Endpoint arn:  {}'.format(endpoint_response['EndpointArn']))

{'EndpointArn': 'arn:aws:sagemaker:us-west-2:113969896847:endpoint/multibert-text-classification-ep-2022-07-20-03-10-32', 'ResponseMetadata': {'RequestId': '8b90b4a3-1cf9-4454-bcdc-88664e2a269a', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '8b90b4a3-1cf9-4454-bcdc-88664e2a269a', 'content-type': 'application/x-amz-json-1.1', 'content-length': '120', 'date': 'Wed, 20 Jul 2022 03:23:07 GMT'}, 'RetryAttempts': 0}}
Endpoint name: multibert-text-classification-ep-2022-07-20-03-10-32
Endpoint arn:  arn:aws:sagemaker:us-west-2:113969896847:endpoint/multibert-text-classification-ep-2022-07-20-03-10-32


In [9]:
# WAIT FOR ENDPOINT TO BE "IN SERVICE" BEFORE PROCEEDING WITH THIS STEP

# invoke endpoint by endpoint name
import json
sm_runtime = session.client("sagemaker-runtime", region_name=region_name)

content_type = "application/json"

# specify "Inputs"
data = {
   "inputs": "I love Amazon SageMaker Studio Lab!"
}

response = sm_runtime.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType=content_type,
    Body=json.dumps(data)
)
print(response)
print(response["Body"].read().decode("utf-8"))

ReadTimeoutError: Read timeout on endpoint URL: "https://runtime.sagemaker.us-west-2.amazonaws.com/endpoints/multibert-text-classification-ep-2022-07-20-03-10-32/invocations"

In [None]:
# clean up: uncomment the following lines
#sm_client.delete_endpoint(EndpointName=endpoint_name)
#sm_client.delete_endpoint_config(EndpointConfigName=endpoint_config_name)
#sm_client.delete_model(ModelName=model_name)