# Precondition: the model is uploaded in s3 by training job or manually

In [None]:
model_data="s3://sagemaker-us-east-1-420737321821/sagemaker/ivy-demo-pytorch-bert/pytorch-training-2021-11-29-14-48-31-535/output/model.tar.gz"

In [2]:
import os
import numpy as np
import pandas as pd
import sagemaker

sagemaker_session = sagemaker.Session()

bucket = sagemaker_session.default_bucket()
prefix = "sagemaker/ivy-demo-pytorch-bert"

role = sagemaker.get_execution_role()
print(bucket)
print(role)

sagemaker-us-east-1-420737321821
arn:aws:iam::420737321821:role/CUSPFE-SageMaker-ML-Team-Execution-Role-Test


# 1. Create Pytorch endpoint with predefined the pytorch framework
- it will create the inference model and endpoint configure automatically
- it will use the awe prebuilt pytorch framework, we don't need to build our own docker imag, but need to prepare the entrypoint script

In [5]:
from sagemaker.pytorch import PyTorchModel

# instance_type = 'ml.m5.large'
# accelerator_type = 'ml.eia2.xlarge'
instance_type = 'ml.t2.medium'

In [10]:
import time

endpoint_name = 'grammar-classification-{}-ep'.format(time.time()).replace('.', '').replace('_', '')
print(endpoint_name)
pytorch = PyTorchModel(
    model_data=model_data,
    role=role,
    entry_point='train_deploy.py',
    source_dir='code',
    framework_version='1.3.1',
    py_version='py3',
    sagemaker_session=sagemaker_session
)

# Function will exit before endpoint is finished creating
predictor = pytorch.deploy(
    initial_instance_count=1,
    instance_type=instance_type,
#     accelerator_type=accelerator_type,
    endpoint_name=endpoint_name,
    wait=True,
)

grammar-classification-16383498089475572-ep
----------!

# 2. Test the endpoint
the code can run outside sagemaker, like lambda function

In [12]:
import boto3
sagemaker_runtime_client = boto3.Session().client(service_name='runtime.sagemaker')

In [13]:
import json
def predict_grammer_text(text):
    response = sagemaker_runtime_client.invoke_endpoint(
        EndpointName=endpoint_name, 
        ContentType='application/json', 
        Body=json.dumps(text)
    )
    result = response['Body'].read()
    result = json.loads(result)
    print('Probabilities for all classes: ', result)
    predicted_class = np.argmax(result)
    if predicted_class == 0:
        print('Grammer incorrect!')
    else:
        print('Grammer correct.')
    return predicted_class

In [14]:
s = "I am a girl come from Chinese."
x = predict_grammer_text(s)
print(x)

Probabilities for all classes:  [[0.6232306361198425, -0.5857623815536499]]
Grammer incorrect!
0


In [15]:
s = "I am a girl from China."
x = predict_grammer_text(s)
print(x)

Probabilities for all classes:  [[-0.879428505897522, 1.862220287322998]]
Grammer correct.
1


# 3. Clean the endpoint
<b>Please remember to clean the endpoint if it is just for practice</b><br/>
the endpoint will create ec2 instance in the backend to serve the predict request. if we didn't clean the endpoint, the ec2 instance will keep running in the backend which will generate cost.

In [19]:
print(predictor.endpoint_name)
resp = sagemaker_session.delete_endpoint(endpoint_name=predictor.endpoint_name)
print(type(resp), resp)

resp = sagemaker_session.delete_endpoint_config(endpoint_config_name=predictor.endpoint_name)
print(type(resp), resp)

<class 'numpy.int64'> None


In [25]:
print(pytorch_model.name)
resp = sagemaker_session.delete_model(model_name=pytorch_model.name)
print(type(resp), resp)

<class 'numpy.int64'> None
