In [83]:
import sagemaker

sess = sagemaker.Session()
sagemaker_session_bucket = sess.default_bucket()
role = sagemaker.get_execution_role() 

In [85]:
import os
filePath = 'model/model.tar.gz'

if os.path.exists(filePath):
    os.remove(filePath)

In [86]:
model_name = "flan-ul2"
%cd model
!tar zcvf model.tar.gz *
s3_location = f"s3://{sagemaker_session_bucket}/{model_name}/model.tar.gz"
!aws s3 cp model.tar.gz $s3_location
%cd ..

In [92]:
from sagemaker.huggingface.model import HuggingFaceModel

huggingface_model = HuggingFaceModel(
    model_data=s3_location,
    role=role,
    transformers_version="4.17",
    pytorch_version="1.10",
    py_version='py38',
)

In [93]:
from sagemaker.utils import name_from_base

endpoint_name = name_from_base(model_name)

predictor = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type="ml.g5.4xlarge",
    endpoint_name=endpoint_name,
)

----------!

!!!NOTE: Even after the endpoint has been deployed, we still need to wait 1-2 minutes before we can start using it. That's because the model is downloading from the HF Model Hub and due to its size it won't be quite finished when the endpoint is deployed.

In [94]:
predictor.endpoint_name

'flan-ul2-2023-03-11-13-15-42-386'

In [95]:
prompt = """Answer the following question by reasoning step by step.
The cafeteria had 23 apples. If they used 20 for lunch, and bought 6 more, how many apples do they have now?"""                                              

In [96]:
data = {
    "inputs": prompt,
    "min_length": 20,
    "max_length": 50,
    "do_sample": True,
    "temperature": 0.6,
}

res = predictor.predict(data=data)
print(res)

They have 23 - 20 = 3 apples left after using some for lunch. They now have 3 + 6 = 9 apples. Therefore, the final answer is 9.
