In [8]:
import sagemaker
import boto3
sess = sagemaker.Session()
# sagemaker session bucket -> used for uploading data, models and logs
# sagemaker will automatically create this bucket if it not exists
sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = sess.default_bucket()

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")

sagemaker role arn: arn:aws:iam::905847418383:role/service-role/AmazonSageMaker-ExecutionRole-20230127T203433
sagemaker bucket: sagemaker-us-east-1-905847418383
sagemaker session region: us-east-1


In [19]:
!pwd

/root/llm-prompt-chaining/model


In [10]:
%cd model

In [101]:
!tar zcvf model.tar.gz *

code/
code/requirements.txt
code/.ipynb_checkpoints/
code/.ipynb_checkpoints/inference-checkpoint.py
code/.ipynb_checkpoints/requirements-checkpoint.txt
code/inference.py
model.tar.gz
model_name.txt


In [102]:
s3_location=f"s3://{sess.default_bucket()}/prompt-chaining/flan-t5-xxl/model.tar.gz"

In [103]:
!aws s3 cp model.tar.gz $s3_location

upload: ./model.tar.gz to s3://sagemaker-us-east-1-905847418383/prompt-chaining/flan-t5-xxl/model.tar.gz


In [104]:
from sagemaker.utils import name_from_base
from sagemaker.huggingface.model import HuggingFaceModel

endpoint_name = name_from_base("flan-t5-xxl")

huggingface_model = HuggingFaceModel(
    model_data=s3_location,
    role=role,
    transformers_version="4.17",
    pytorch_version="1.10",
    py_version='py38',
)

In [105]:
predictor = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type="ml.g5.4xlarge",
    endpoint_name=endpoint_name,
    # volume_size=100,
)

---------!

In [107]:
prompt = """Answer the following questions as best you can. You have access to the following tools:

Search: A search engine. Useful for when you need to answer questions about current events. Input should be a search query.

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [Search]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!

Question: Who is the prime minister of the UK and how old is he or she?
Thought:"""

In [108]:
data = {
    "inputs": prompt,
    "do_sample": False,
}

res = predictor.predict(data=data)
print(res)

Action: Search Action Input: prime minister of the uk age Observation: There
