In [1]:
import boto3
import json
import urllib
import sagemaker
import tarfile

In [3]:
from sagemaker import get_execution_role

In [31]:
# Retrieves the SageMaker Execution role
sagemaker_execution_role = get_execution_role()

### 1. S3 Bucket Creation (Model Artefacts Store)
Here we create an S3 bucket that will store the HuggingFace Transformers BERT model artefacts.

In [4]:
s3_resource = boto3.resource('s3')

In [5]:
bucket = 'sagemaker-endpoint-artefacts'
region = 'eu-west-1'

# Creates the S3 bucket
s3_resource.create_bucket(Bucket=bucket, 
                          CreateBucketConfiguration={'LocationConstraint': region})

s3.Bucket(name='sagemaker-bert-endpoint-artefacts')

### Model Artefact Preparation
In order to upload the BERT model artefacts to our S3 bucket, we need to retrieve them from HuggingFace. We can pull the following artefacts from Huggingface using the `urllib` library:
- config
- vocab
- model

After we have downloaded the artefacts, they must be zipped up into a tarball object as required by SageMaker. The `predictor.py` and `requirements.txt` also need to added. This will result in a `model.tar.gz` object.

In [6]:
# Creates a local model_artefacts directory
!mkdir ./local-sagemaker-endpoint-artefacts

# adds path
path = './local-sagemaker-endpoint-artefacts/'

In [7]:
# model artefacts URLs referencing HuggingFace's own S3 bucket
config = 'https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-finetuned-squad-config.json'
vocab = 'https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-finetuned-squad-vocab.txt'
model = 'https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-finetuned-squad-pytorch_model.bin'

# Downloads each model artefacts
urllib.request.urlretrieve(config, path+'config_file.json')
urllib.request.urlretrieve(vocab, path+'vocab.txt')
urllib.request.urlretrieve(model, path+'pytorch_model.bin')

('./local-sagemaker-bert-endpoint-artefacts/pytorch_model.bin',
 <http.client.HTTPMessage at 0x7f42c95bf588>)

In [8]:
# Zips the model artefacts up as a tarball object
with tarfile.open('./local-sagemaker-endpoint-artefacts/model.tar.gz', 'w:gz') as f:
    f.add('./local-sagemaker-endpoint-artefacts/config_file.json', arcname='./config_file.json')
    f.add('./local-sagemaker-endpoint-artefacts/vocab.txt', arcname='./vocab.txt')
    f.add('./local-sagemaker-endpoint-artefacts/pytorch_model.bin', arcname='./pytorch_model.bin')
    f.add('./predictor/requirements.txt', arcname='./code/requirements.txt')
    f.add('./predictor/predictor.py', arcname='./code/predictor.py')

### 2. Model Artefacts Upload
Now the model artefacts can be uploaded to the S3 bucket. These will be used by SageMaker to build the model and the endpoint.

In [9]:
# Initialises a sagemaker session
sagemaker_session = sagemaker.Session()

In [10]:
# Local model_artefacts directory path
local_artefact_path = './local-sagemaker-endpoint-artefacts/model.tar.gz'
model_prefix = 'bert-model'

# Uploads the model.tar.gz object to S3
model_artefact = sagemaker_session.upload_data(path=local_artefact_path,
                                               bucket=bucket,
                                               key_prefix=model_prefix)

In [11]:
# Displays the S3 path to the model tarball object
print(model_artefact)

s3://sagemaker-bert-endpoint-artefacts/bert-squad-model/model.tar.gz


### 3. SageMaker Endpoint Deployment & Testing
The SageMaker Model, Endpoint Configuration and Endpoint can be deployed using a CloudFormation stack. A CloudFormation template, `endpoint-deployment.yaml` is provided. Once the stack has successfully created, the model is endpoint is ready for inference. We can test the endpoint using a paragraph from the SQuAD dataset.

In [13]:
# Initializes the SageMaker runtime
sagemaker_runtime = boto3.client('sagemaker-runtime')

In [33]:
context = "Large-scale construction requires collaboration across multiple disciplines. An architect normally manages the job, and a construction manager, design engineer, construction engineer or project manager supervises it. For the successful execution of a project, effective planning is essential. Those involved with the design and execution of the infrastructure in question must consider zoning requirements, the environmental impact of the job, the successful scheduling, budgeting, construction-site safety, availability and transportation of building materials, logistics, inconvenience to the public caused by construction delays and bidding, etc. The largest construction projects are referred to as megaprojects."
print(context)

Large-scale construction requires collaboration across multiple disciplines. An architect normally manages the job, and a construction manager, design engineer, construction engineer or project manager supervises it. For the successful execution of a project, effective planning is essential. Those involved with the design and execution of the infrastructure in question must consider zoning requirements, the environmental impact of the job, the successful scheduling, budgeting, construction-site safety, availability and transportation of building materials, logistics, inconvenience to the public caused by construction delays and bidding, etc. The largest construction projects are referred to as megaprojects.


In [34]:
# Try your own question!
question = "Who normally oversees a construction job?"
print(question)

Who normally oversees a construction job?


In [35]:
# Creates a test event dict object
test_event = {
    "context": context,
    "question": question
}

{'context': 'Large-scale construction requires collaboration across multiple disciplines. An architect normally manages the job, and a construction manager, design engineer, construction engineer or project manager supervises it. For the successful execution of a project, effective planning is essential. Those involved with the design and execution of the infrastructure in question must consider zoning requirements, the environmental impact of the job, the successful scheduling, budgeting, construction-site safety, availability and transportation of building materials, logistics, inconvenience to the public caused by construction delays and bidding, etc. The largest construction projects are referred to as megaprojects.', 'question': 'Who normally oversees a construction job?'}


In [39]:
# Defines the SageMaker endpoint name
sagemaker_endpoint = 'bert-model-endpoint'

# Invokes the SageMaker endpoint
response = sagemaker_runtime.invoke_endpoint(EndpointName=sagemaker_endpoint,
                                             Body=json.dumps(test_event),
                                             ContentType='application/json')

# Captures the response body and should return an answer
response_body = response['Body'] 
print(response_body.read().decode("utf-8"))

"an architect"
