# 

In [1]:
import sagemaker, boto3, json
from sagemaker.session import Session

sagemaker_session = Session()
aws_role = sagemaker_session.get_caller_identity_arn()

In [3]:
model_id, model_version, = (
    "huggingface-text2text-flan-t5-xl",
    "*",
)

In [4]:
from sagemaker import image_uris, model_uris
from sagemaker.model import Model
from sagemaker.predictor import Predictor
from sagemaker.utils import name_from_base


endpoint_name = name_from_base(f"jumpstart-example-{model_id}")

inference_instance_type = "ml.p3.2xlarge"

# Retrieve the inference docker container uri. This is the base HuggingFace container image for the default model above.
deploy_image_uri = image_uris.retrieve(
    region=None,
    framework=None,  # automatically inferred from model_id
    image_scope="inference",
    model_id=model_id,
    model_version=model_version,
    instance_type=inference_instance_type,
)

# Retrieve the model uri.
model_uri = model_uris.retrieve(
    model_id=model_id, model_version=model_version, model_scope="inference"
)

model = Model(
    image_uri=deploy_image_uri,
    model_data=model_uri,
    role=aws_role,
    predictor_cls=Predictor,
    name=endpoint_name,
)

# deploy the Model. Note that we need to pass Predictor class when we deploy model through Model class,
# for being able to run inference through the sagemaker API.
model_predictor = model.deploy(
    initial_instance_count=1,
    instance_type=inference_instance_type,
    predictor_cls=Predictor,
    endpoint_name=endpoint_name,
    volume_size=30,
)

-----------!

In [5]:
newline, bold, unbold = "\n", "\033[1m", "\033[0m"


def query_endpoint(encoded_text, endpoint_name):
    client = boto3.client("runtime.sagemaker")
    response = client.invoke_endpoint(
        EndpointName=endpoint_name, ContentType="application/x-text", Body=encoded_text
    )
    return response


def parse_response(query_response):
    model_predictions = json.loads(query_response["Body"].read())
    generated_text = model_predictions["generated_text"]
    return generated_text

In [6]:
newline, bold, unbold = "\n", "\033[1m", "\033[0m"

text1 = "Translate to Spanish:  My name is Arthur"
text2 = "A step by step recipe to make bolognese pasta:"
text3 = "元気ですか？"
text4 = "Review: This moive is so great and once again dazzles and delights us\nthis movie review sentence negative or positive?"


for text in [text1, text2, text3, text4]:
    query_response = query_endpoint(text.encode("utf-8"), endpoint_name=endpoint_name)
    generated_text = parse_response(query_response)
    print(
        f"Inference:{newline}"
        f"input text: {text}{newline}"
        f"generated text: {bold}{generated_text}{unbold}{newline}"
    )


Inference:
input text: Translate to Spanish:  My name is Arthur
generated text: [1mYo es Arthur[0m

Inference:
input text: A step by step recipe to make bolognese pasta:
generated text: [1mIn a large saucepan, combine the ground beef, onion, garlic, tomato paste, tomato[0m

Inference:
input text: 元気ですか？
generated text: [1m?[0m

Inference:
input text: Review: This moive is so great and once again dazzles and delights us
this movie review sentence negative or positive?
generated text: [1mpositive[0m



In [7]:
model_predictor.delete_model()
model_predictor.delete_endpoint()