# Set up a Large Lanugae Model

In [None]:
import sagemaker
import boto3
sess = sagemaker.Session()
# sagemaker session bucket -> used for uploading data, models and logs
# sagemaker will automatically create this bucket if it not exists
sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = sess.default_bucket()

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")


# Deploy a sagemaker endpoint for Flan T5

## Write a inference file

In [None]:
import boto3
import json
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri

# Hub Model configuration. https://huggingface.co/models
hub = {
	'HF_MODEL_ID':'google/flan-t5-xl',
	'SM_NUM_GPUS': json.dumps(1)
}

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
	image_uri=get_huggingface_llm_image_uri("huggingface",version="0.8.2"),
	env=hub,
	role=role, 
)

# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
	initial_instance_count=1,
	instance_type="ml.g5.2xlarge",
	container_startup_health_check_timeout=300,
  )
  

In [None]:
ENDPOINT_NAME = predictor.endpoint_name
ENDPOINT_NAME

## Test your endpoint

In [None]:
import json
import boto3
sagemaker_client = boto3.client('sagemaker-runtime')
payload = """Summarize the following text:
deployment so you can scale to thousands of ML models in production. SageMaker Pipelines comes with a Python SDK that connects to SageMaker Studio so you can take advantage of a visual interface to build each step of the workflow. Then using a single API, you can connect each step to create a complete workflow. SageMaker Pipelines takes care of managing data between steps, packaging the code recipes, and orchestrating their execution, reducing months of coding to a few hours. Every time a workflow executes, a complete record of the data processed and actions taken is kept so data scientists and ML developers can quickly debug problems.
"""

response = sagemaker_client.invoke_endpoint(EndpointName=ENDPOINT_NAME,
                                            ContentType='application/json',
                                            Body= json.dumps({"inputs": payload}))
result = json.loads(response['Body'].read().decode())

In [None]:
result