# 1. Download the pretrained model and upload to s3

In [2]:
# !pip install Transformers

In [4]:
from transformers import BertForSequenceClassification

pretrained_model="bert-base-uncased"
model = BertForSequenceClassification.from_pretrained(pretrained_model)
model.save_pretrained("./model")

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/420M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [5]:
!ls -rtlh ./model/
!cd model && tar czvf ../model.tar.gz *
!cd ../ && ls -lrt

total 418M
-rw-rw-r-- 1 ec2-user ec2-user  678 Dec  6 10:22 config.json
-rw-rw-r-- 1 ec2-user ec2-user 418M Dec  6 10:22 pytorch_model.bin
config.json
pytorch_model.bin
total 28
drwx------ 2 root     root     16384 Nov 29 09:04 lost+found
-rw-rw-r-- 1 ec2-user ec2-user  1999 Nov 30 02:24 Untitled.ipynb
drwxrwxr-x 6 ec2-user ec2-user  4096 Nov 30 03:09 sagemaker_train_demo
drwxrwxr-x 7 ec2-user ec2-user  4096 Dec  6 10:23 amazon-sagemaker-bert-pytorch


In [8]:
import boto3
import os
import numpy as np
import pandas as pd
import sagemaker

sagemaker_session = sagemaker.Session()

bucket = sagemaker_session.default_bucket()
prefix = "sagemaker/ivy-pretrained-pytorch-bert"

role = sagemaker.get_execution_role()
print(bucket)
print(role)

fObj = open("model.tar.gz", "rb")
key = os.path.join(prefix, "model.tar.gz")
boto3.Session().resource("s3").Bucket(bucket).Object(key).upload_fileobj(fObj)
s3_pretrain_model = "s3://{}".format(os.path.join(bucket, key))
print(s3_pretrain_model)

sagemaker-us-east-1-420737321821
arn:aws:iam::420737321821:role/CUSPFE-SageMaker-ML-Team-Execution-Role-Test
s3://sagemaker-us-east-1-420737321821/sagemaker/ivy-pretrained-pytorch-bert/model.tar.gz


# 2. Prepare the Inference Script
To deploy a pretrained PyTorch model, we will need to use the PyTorch estimator object to create a PyTorchModel object and set a different entry_point.

An implementation of model_fn is required for inference script. We are going to use default implementations of input_fn, predict_fn, output_fn and model_fn

# 3. Deploy With the pretrained model and inference script

## 3.1 Create Model Object

In [9]:
from sagemaker.pytorch.model import PyTorchModel

pytorch_model = PyTorchModel(
    model_data=s3_pretrain_model,
    role=role,
    entry_point='deploy_ei.py',
    source_dir='code',
    framework_version='1.3.1',
    py_version='py3',
    sagemaker_session=sagemaker_session
)

## 3.2 Create inference endpoint

In [10]:
import time

endpoint_name = 'grammar-classification-{}-ep'.format(time.time()).replace('.', '').replace('_', '')
instance_type = "ml.t2.medium" #'ml.m5.large'
# Function will exit before endpoint is finished creating
predictor = pytorch_model.deploy(
    initial_instance_count=1,
    instance_type=instance_type,
#     accelerator_type=accelerator_type,
    endpoint_name=endpoint_name,
    wait=True,
)


-------------!

In [11]:
predictor.serializer = sagemaker.serializers.JSONSerializer()
predictor.deserializer = sagemaker.deserializers.JSONDeserializer()

res = predictor.predict('Please remember to delete me when you are done.')
print("Predicted class:", np.argmax(res, axis=1))

ModelError: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received server error (500) from primary with message "The provided filename /opt/ml/model/traced_bert.pt does not exist". See https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logEventViewer:group=/aws/sagemaker/Endpoints/grammar-classification-16387863074239173-ep in account 420737321821 for more information.

# 4. Test the endpoint
the code can run outside sagemaker, like lambda function

In [12]:
import boto3
sagemaker_runtime_client = boto3.Session().client(service_name='runtime.sagemaker')

In [13]:
import json
def predict_grammer_text(text):
    response = sagemaker_runtime_client.invoke_endpoint(
        EndpointName=endpoint_name, 
        ContentType='application/json', 
        Body=json.dumps(text)
    )
    result = response['Body'].read()
    result = json.loads(result)
    print('Probabilities for all classes: ', result)
    predicted_class = np.argmax(result)
    if predicted_class == 0:
        print('Grammer incorrect!')
    else:
        print('Grammer correct.')
    return predicted_class

In [14]:
s = "I am a girl come from Chinese."
x = predict_grammer_text(s)
print(x)

Probabilities for all classes:  [[0.6232306361198425, -0.5857623815536499]]
Grammer incorrect!
0


In [15]:
s = "I am a girl from China."
x = predict_grammer_text(s)
print(x)

Probabilities for all classes:  [[-0.879428505897522, 1.862220287322998]]
Grammer correct.
1


# 5. Clean the endpoint
<b>Please remember to clean the endpoint if it is just for practice</b><br/>
the endpoint will create ec2 instance in the backend to serve the predict request. if we didn't clean the endpoint, the ec2 instance will keep running in the backend which will generate cost.

In [19]:
resp=predictor.delete_endpoint()
print(type(x), resp)

<class 'numpy.int64'> None


In [25]:
resp=pytorch.delete_model()
print(type(x), resp)

<class 'numpy.int64'> None
