### Install requirements

In [None]:
!pip install sagemaker --quiet --upgrade --force-reinstall

### JumpStart example

In [None]:
from sagemaker.jumpstart.model import JumpStartModel

In [None]:
model_jumpstart = JumpStartModel(model_id="huggingface-llm-falcon-7b-bf16")

In [None]:
predictor_jumpstart = model_jumpstart.deploy(instance_type="ml.g5.2xlarge")

In [None]:
prompt = "What are the places to see in Rome?"

payload = {
    "inputs": prompt,
    "parameters": {
        "do_sample": True,
        "top_p": 0.9,
        "temperature": 0.4,
        "max_new_tokens": 256,
        "stop": ["<|endoftext|>", "</s>"],
    },
}

response = predictor_jumpstart.predict(payload)
print(response[0]["generated_text"])

##### Deleting endpoint

In [None]:
predictor_jumpstart.delete_model()
predictor_jumpstart.delete_endpoint()

### HuggingFace Estimator Example

In [None]:
import sagemaker
import boto3
import json
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri

try:
	role = sagemaker.get_execution_role()
except ValueError:
	iam = boto3.client('iam')
	role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

In [None]:
hub = {
	'HF_MODEL_ID':'tiiuae/falcon-7b',
	'SM_NUM_GPUS': json.dumps(1)
}
model_huggingface = HuggingFaceModel(
	image_uri=get_huggingface_llm_image_uri("huggingface",version="0.8.2"),
	env=hub,
	role=role,
)

In [None]:
predictor_huggingface = model_huggingface.deploy(
	initial_instance_count=1,
	instance_type="ml.g5.2xlarge",
	container_startup_health_check_timeout=300,
)

##### Deleting endpoint

In [None]:
predictor_huggingface.delete_model()
predictor_huggingface.delete_endpoint()

### SageMaker Custom Docker image example

In [None]:
from sagemaker import image_uris, model_uris, script_uris, hyperparameters
from sagemaker.jumpstart.notebook_utils import list_jumpstart_models
from sagemaker.predictor import Predictor
from sagemaker.model import Model
from sagemaker.session import Session

sagemaker_session = Session()
aws_role = sagemaker_session.get_caller_identity_arn()

In [None]:
deploy_image_uri = image_uris.retrieve(
    region=None,
    framework=None,  # automatically inferred from model_id
    image_scope="inference",
    model_id="huggingface-llm-falcon-7b-bf16",
    model_version="*",
    instance_type="ml.g5.2xlarge",
)

model_uri = model_uris.retrieve(
    model_id="huggingface-llm-falcon-7b-bf16", model_version="*", model_scope="inference"
)

In [None]:
deploy_image_uri

In [None]:
model_uri

In [None]:
model_custom = Model(
    image_uri=deploy_image_uri,
    model_data=model_uri,
    role=aws_role,
    predictor_cls=Predictor,
    env={
        "SAGEMAKER_MODEL_SERVER_WORKERS": "1",
        "TS_DEFAULT_WORKERS_PER_MODEL": "1",
        "HF_MODEL_ID":"tiiuae/falcon-7b",
        "HF_TASK":"text-generation"
    }
)

In [None]:
predictor_custom = model_custom.deploy(
    initial_instance_count=1,
    instance_type="ml.g5.2xlarge",
    predictor_cls=Predictor,
)

##### Deleting endpoint

In [None]:
predictor_custom.delete_model()
predictor_custom.delete_endpoint()