In [1]:
# install pre-trained model packages
%pip install torch==1.8.1+cpu torchvision==0.9.1+cpu torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html
%pip install huggingface_hub==0.1.0 
%pip install transformers==4.12

Looking in links: https://download.pytorch.org/whl/torch_stable.html
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
# test pre-trained model
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import TextClassificationPipeline

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")

model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")

pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True)
pipe("I love Amazon SageMaker Studio Lab!")

[[{'label': 'NEGATIVE', 'score': 0.0005074660875834525},
  {'label': 'POSITIVE', 'score': 0.9994925260543823}]]

In [3]:
# install AWS packages
%pip install boto3
%pip install sagemaker

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Note: you may need to restart the kernel to use updated packages.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Note: you may need to restart the kernel to use updated packages.


In [4]:
# set profile name as opposed to entering credentials
profile_name = 'default'
region_name = 'us-west-2'

In [5]:
# get and test sagemaker client
import boto3 
session = boto3.Session(profile_name=profile_name)
sm_client = session.client('sagemaker', region_name=region_name)
response = sm_client.list_endpoints()
print(response)

{'Endpoints': [], 'ResponseMetadata': {'RequestId': 'f99610c5-b3cd-489d-9481-0a0bab8ef9ab', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'f99610c5-b3cd-489d-9481-0a0bab8ef9ab', 'content-type': 'application/x-amz-json-1.1', 'content-length': '16', 'date': 'Thu, 14 Jul 2022 05:58:57 GMT'}, 'RetryAttempts': 0}}


In [6]:
# set model name and endpoint configuration name
import time
ml_model_name = "distilbert-text-classification"
timestamp = time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
model_name = ml_model_name + '-model' + timestamp
endpoint_config_name = ml_model_name + '-epc' + timestamp
endpoint_name = ml_model_name + '-ep' + timestamp
print(model_name)
print(endpoint_config_name)
print(endpoint_name)

distilbert-text-classification-model-2022-07-14-05-58-59
distilbert-text-classification-epc-2022-07-14-05-58-59
distilbert-text-classification-ep-2022-07-14-05-58-59


In [7]:
# set sagemaker execution role
import sagemaker
# create a sagemaker execution role via the AWS SageMaker console, then paste in the arn here
role = 'arn:aws:iam::056558984126:role/service-role/AmazonSageMaker-ExecutionRole-20220625T194668'

In [8]:
# see deep learning containers (DLC) available images here:
# https://github.com/aws/deep-learning-containers/blob/master/available_images.md 
model_image_url="763104351884.dkr.ecr."+region_name+".amazonaws.com/"+\
                "huggingface-pytorch-inference:1.9-transformers4.12-cpu-py38-ubuntu20.04"
print(model_image_url)

# set container config
container_config = {
    'Image': model_image_url,
    'Mode': 'SingleModel',
    'Environment': {
        'HF_MODEL_ID': 'distilbert-base-uncased-finetuned-sst-2-english',
        'HF_TASK' : 'text-classification',
        'SAGEMAKER_CONTAINER_LOG_LEVEL' : '20',
        'SAGEMAKER_REGION' : region_name
    }
}
print(container_config)

# create model
# ... models console: https://console.aws.amazon.com/sagemaker/home?#/models
response = sm_client.create_model(
    ModelName=model_name,
    PrimaryContainer=container_config,
    ExecutionRoleArn=role, 
    EnableNetworkIsolation=False
)
print(response)

763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-inference:1.9-transformers4.12-cpu-py38-ubuntu20.04
{'Image': '763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-inference:1.9-transformers4.12-cpu-py38-ubuntu20.04', 'Mode': 'SingleModel', 'Environment': {'HF_MODEL_ID': 'distilbert-base-uncased-finetuned-sst-2-english', 'HF_TASK': 'text-classification', 'SAGEMAKER_CONTAINER_LOG_LEVEL': '20', 'SAGEMAKER_REGION': 'us-west-2'}}
{'ModelArn': 'arn:aws:sagemaker:us-west-2:056558984126:model/distilbert-text-classification-model-2022-07-14-05-58-59', 'ResponseMetadata': {'RequestId': '5b4d29a4-d73e-4d60-bcbf-4b6f928a3b0a', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '5b4d29a4-d73e-4d60-bcbf-4b6f928a3b0a', 'content-type': 'application/x-amz-json-1.1', 'content-length': '118', 'date': 'Thu, 14 Jul 2022 05:59:12 GMT'}, 'RetryAttempts': 0}}


In [9]:
# create endpoint config
# ... endpoint configs console: https://console.aws.amazon.com/sagemaker/home?#/endpointConfig
endpoint_config_response = sm_client.create_endpoint_config(
   EndpointConfigName=endpoint_config_name,
   ProductionVariants=[
        {
            "ModelName": model_name,
            "VariantName": "AllTraffic",
            "ServerlessConfig": {
                # Specify MemorySizeInMB and MaxConcurrency in the serverless config object
                "MemorySizeInMB": 4096,
                "MaxConcurrency": 10
            }
        }
    ]
)
print(endpoint_config_response)

print('Endpoint configuration name: {}'.format(endpoint_config_name))
print('Endpoint configuration arn:  {}'.format(endpoint_config_response['EndpointConfigArn']))

{'EndpointConfigArn': 'arn:aws:sagemaker:us-west-2:056558984126:endpoint-config/distilbert-text-classification-epc-2022-07-14-05-58-59', 'ResponseMetadata': {'RequestId': '65e2b5da-c438-49b2-9d30-7f55b3da5e46', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '65e2b5da-c438-49b2-9d30-7f55b3da5e46', 'content-type': 'application/x-amz-json-1.1', 'content-length': '135', 'date': 'Thu, 14 Jul 2022 05:59:14 GMT'}, 'RetryAttempts': 0}}
Endpoint configuration name: distilbert-text-classification-epc-2022-07-14-05-58-59
Endpoint configuration arn:  arn:aws:sagemaker:us-west-2:056558984126:endpoint-config/distilbert-text-classification-epc-2022-07-14-05-58-59


In [10]:
# create endpoint
# ... endpoints console: https://console.aws.amazon.com/sagemaker/home?#/endpoints
endpoint_response = sm_client.create_endpoint(
    EndpointName=endpoint_name,
    EndpointConfigName=endpoint_config_name
)
print(endpoint_response)

print('Endpoint name: {}'.format(endpoint_name))
print('Endpoint arn:  {}'.format(endpoint_response['EndpointArn']))

{'EndpointArn': 'arn:aws:sagemaker:us-west-2:056558984126:endpoint/distilbert-text-classification-ep-2022-07-14-05-58-59', 'ResponseMetadata': {'RequestId': '0d64c774-6f6d-435f-a960-c7812fd8fd64', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '0d64c774-6f6d-435f-a960-c7812fd8fd64', 'content-type': 'application/x-amz-json-1.1', 'content-length': '121', 'date': 'Thu, 14 Jul 2022 05:59:19 GMT'}, 'RetryAttempts': 0}}
Endpoint name: distilbert-text-classification-ep-2022-07-14-05-58-59
Endpoint arn:  arn:aws:sagemaker:us-west-2:056558984126:endpoint/distilbert-text-classification-ep-2022-07-14-05-58-59


In [13]:
# WAIT FOR ENDPOINT TO BE "IN SERVICE" BEFORE PROCEEDING WITH THIS STEP

# invoke endpoint by endpoint name
import json
sm_runtime = session.client("sagemaker-runtime", region_name=region_name)

content_type = "application/json"

# specify "Inputs"
data = {
   "inputs": "Hi, I am Dogbert."
}

response = sm_runtime.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType=content_type,
    Body=json.dumps(data)
)
print(response)
print(response["Body"].read().decode("utf-8"))

{'ResponseMetadata': {'RequestId': 'cd1dff89-cbea-4b74-8ca1-6dbfef30cd47', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'cd1dff89-cbea-4b74-8ca1-6dbfef30cd47', 'x-amzn-invoked-production-variant': 'AllTraffic', 'date': 'Thu, 14 Jul 2022 06:04:28 GMT', 'content-type': 'application/json', 'content-length': '48'}, 'RetryAttempts': 0}, 'ContentType': 'application/json', 'InvokedProductionVariant': 'AllTraffic', 'Body': <botocore.response.StreamingBody object at 0x7f4dd5815070>}
[{"label":"POSITIVE","score":0.997832715511322}]


In [None]:
# clean up: uncomment the following lines
#sm_client.delete_endpoint(EndpointName=endpoint_name)
#sm_client.delete_endpoint_config(EndpointConfigName=endpoint_config_name)
#sm_client.delete_model(ModelName=model_name)