In [2]:
import sagemaker
import boto3

sess = sagemaker.Session()
sagemaker_session_bucket = sess.default_bucket()
role = sagemaker.get_execution_role() 

In [3]:
import os
filePath = 'model/model.tar.gz'

if os.path.exists(filePath):
    os.remove(filePath)

In [4]:
model_name = "flan-t5-xxl"

In [5]:
%cd model

/home/ec2-user/SageMaker/deploy-flan-t5-sagemaker/model


In [6]:
!tar zcvf model.tar.gz *

code/
code/requirements.txt
code/.ipynb_checkpoints/
code/.ipynb_checkpoints/requirements-checkpoint.txt
code/.ipynb_checkpoints/inference-checkpoint.py
code/inference.py


In [7]:
s3_location = f"s3://{sess.default_bucket()}/{model_name}/model.tar.gz"

In [8]:
!aws s3 cp model.tar.gz $s3_location

upload: ./model.tar.gz to s3://sagemaker-us-east-1-905847418383/flan-t5-xxl/model.tar.gz


In [9]:
%cd ..

/home/ec2-user/SageMaker/deploy-flan-t5-sagemaker


In [10]:
from sagemaker.huggingface.model import HuggingFaceModel

huggingface_model = HuggingFaceModel(
    model_data=s3_location,
    role=role,
    transformers_version="4.17",
    pytorch_version="1.10",
    py_version='py38',
)

In [11]:
from sagemaker.utils import name_from_base

endpoint_name = name_from_base(model_name)

predictor = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type="ml.g5.4xlarge",
    endpoint_name=endpoint_name,
)

---------!

!!!NOTE: Even after the endpoint has been deployed, we still need to wait 1-2 minutes before we can start using it. That's because the model is downloading from the HF Model Hub and due to its size it won't be quite finished when the endpoint is deployed.

In [12]:
predictor.endpoint_name

'flan-t5-xxl-2023-03-10-07-09-14-864'

In [13]:
prompt = """Answer the following question by reasoning step by step.
The cafeteria had 23 apples. If they used 20 for lunch, and bought 6 more, how many apples do they have now?"""                                              

In [19]:
data = {
    "inputs": prompt,
    "min_length": 20,
    "max_length": 50,
    "do_sample": True,
    "temperature": 0.6,
}

res = predictor.predict(data=data)
print(res)

They used 20 apples, so they have 23 - 20 = 3 apples now. They bought 6, so they have 3 + 6 = 9 apples now. Therefore, the answer is 9.
