# Chapter Summarizer Inference
Create a serverless endoint using a pre-baked docker container.

In [1]:
!pip install sagemaker botocore boto3 awscli --upgrade

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Collecting sagemaker
  Downloading sagemaker-2.151.0.tar.gz (747 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m748.0/748.0 kB[0m [31m22.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting botocore
  Downloading botocore-1.29.122-py3-none-any.whl (10.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.7/10.7 MB[0m [31m58.7 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
Collecting boto3
  Downloading boto3-1.26.122-py3-none-any.whl (135 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.6/135.6 kB[0m [31m27.0 MB/s[0m eta [36m0:00:00[0m
Collecting awscli
  Downloading awscli-1.27.122-py3-none-any.whl (4.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.1/4.1 MB[0m [31m30.5 MB/s[0m eta [36m0:00:00[0m:00:01[0m
Collecting cloudpickle==2.2.1
  Downloading cloudpickle-2.2.

In [1]:
BASE_NAME = "chpater-sum-gpt3"
SERVE_IMG_URI = "957269117416.dkr.ecr.eu-central-1.amazonaws.com/chapter-sum-gpt3:latest"

## Define role, model name, and endpoint name

In [2]:
import boto3
import sagemaker
from sagemaker import Session
from sagemaker.model import Model
from sagemaker.predictor import Predictor
from sagemaker.serializers import JSONSerializer

sagemaker_role = sagemaker.get_execution_role()

model_name = f"{BASE_NAME}-model"
endpoint_name = f"{BASE_NAME}-endpoint"
print(f"Model name: {model_name}")
print(f"Endpoint name: {endpoint_name}")
print(f"Image URI: {SERVE_IMG_URI}")
print(f"Role: {sagemaker_role}")

Model name: chpater-sum-gpt3-apr23-model
Endpoint name: chpater-sum-gpt3-apr23-endpoint
Image URI: 957269117416.dkr.ecr.eu-central-1.amazonaws.com/chapter-sum-gpt3:latest
Role: arn:aws:iam::957269117416:role/service-role/AmazonSageMaker-ExecutionRole-20230121T194089


## Create a SageMaker model, serverless config, and serverless endpoint

In [17]:
from sagemaker.serverless import ServerlessInferenceConfig

#create a SM model using a pre-baked docker container
summarizer_model = Model(
    name=model_name,
    role=sagemaker_role,
    image_uri=SERVE_IMG_URI,
    predictor_cls=Predictor,
    #env = {'MODEL_SERVER_TIMEOUT' : '3000' }
)

# Specify MemorySizeInMB and MaxConcurrency in the serverless config object
serverless_config = ServerlessInferenceConfig(
  memory_size_in_mb=2048,
  max_concurrency=2,
)

# create an endpoint config and endpoint.
serverless_predictor = summarizer_model.deploy(
    endpoint_name=endpoint_name,
    serializer=JSONSerializer(),
    serverless_inference_config=serverless_config
)

print(f"\nCreated model: {summarizer_model.name}")
print(f"Created endpoint: {serverless_predictor.endpoint_name}")

--!
Created model: chpater-sum-gpt3-apr23-model
Created endpoint: chpater-sum-gpt3-apr23-endpoint


## Getting prediction from an endpoint

In [10]:
import json

sm_runtime= boto3.client('runtime.sagemaker')
input_path = "/home/ec2-user/SageMaker/chapter_summarization_api/src/summarizer/resources/chapter/01.txt"

with open(input_path, "r") as fp:
    long_text = fp.read()
    
    payload_json = json.dumps({'text': long_text})

    response = sm_runtime.invoke_endpoint(
        EndpointName=endpoint_name, 
        Body=payload_json.encode('utf-8'), 
        ContentType="application/json",
        Accept="application/json"
    )

    response_body = response['Body'].read().decode('utf-8')
    summary_text = json.loads(response_body)["summary"]
    print(f"Summary:\n{summary_text}")

Summary:
: Alice follows a White Rabbit into a rabbit-hole, and falls down a deep well. She is eventually rescued by a mouse, sees a beautiful garden, and finally escapes from the hall by unlocking a door with a key she finds on a small glass table. 

Alice found a key and a bottle of wine on her way to the little door, but she got lost trying to find the right way to shut it. She ate a cake to make herself grow, but didn't grow at all.


## Clean up Resources

In [3]:
sm = boto3.client("sagemaker")

response = sm.delete_endpoint(EndpointName=endpoint_name)
print(f"Delete endpoint response: {response}")

response = sm.delete_endpoint_config(EndpointConfigName=endpoint_name)
print(f"Delete endpoint configuration response: {response}")

response = sm.delete_model(ModelName=model_name)
print(f"Delete model response: {response}")

Delete endpoint response: {'ResponseMetadata': {'RequestId': '367ede45-8ba0-44e5-8071-1a8a0fb42a5d', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '367ede45-8ba0-44e5-8071-1a8a0fb42a5d', 'content-type': 'application/x-amz-json-1.1', 'content-length': '0', 'date': 'Sat, 29 Apr 2023 03:27:09 GMT'}, 'RetryAttempts': 0}}
Delete endpoint configuration response: {'ResponseMetadata': {'RequestId': 'b29c7836-9b0f-41f8-897e-81362b922eb2', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'b29c7836-9b0f-41f8-897e-81362b922eb2', 'content-type': 'application/x-amz-json-1.1', 'content-length': '0', 'date': 'Sat, 29 Apr 2023 03:27:09 GMT'}, 'RetryAttempts': 0}}
Delete model response: {'ResponseMetadata': {'RequestId': '901d3da8-c6f8-4121-8181-5c23338d0537', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '901d3da8-c6f8-4121-8181-5c23338d0537', 'content-type': 'application/x-amz-json-1.1', 'content-length': '0', 'date': 'Sat, 29 Apr 2023 03:27:09 GMT'}, 'RetryAttempts

In [1]:
!python --version

Python 3.8.13
