In [None]:
import sagemaker
import boto3

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

print(f"sagemaker role arn: {role}")

In [None]:
from sagemaker.huggingface import HuggingFaceModel

# Hub Model configuration. https://huggingface.co/models
hub = {
  'HF_MODEL_ID':'stabilityai/stablelm-zephyr-3b', # model_id from hf.co/models
  'HF_TASK':'question-answering' # NLP task you want to use for predictions
}

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
   env=hub,
   role=role, # iam role with permissions to create an Endpoint
   transformers_version="4.26", # transformers version used
   pytorch_version="1.13", # pytorch version used
   py_version="py39", # python version of the DLC
)

In [None]:
# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
   initial_instance_count=1,
   instance_type="ml.m5.xlarge"
)

In [None]:
# example request, you always need to define "inputs"
data = {
"inputs": {
    "question": "What is used for inference?",
    "context": "My Name is Philipp and I live in Nuremberg. This model is used with sagemaker for inference."
    }
}

# request
predictor.predict(data)

In [None]:
# delete endpoint
predictor.delete_model()
predictor.delete_endpoint()

In [None]:
import boto3
import json

# Initialize SageMaker client
sagemaker_runtime = boto3.client('sagemaker-runtime')

# Define the input
input_data = {
    "inputs": "Your input text here"
}

# Invoke endpoint
response = sagemaker_runtime.invoke_endpoint(
    EndpointName='stablelm-zephyr-3b-endpoint',
    ContentType='application/json',
    Body=json.dumps(input_data)
)

# Read response
result = response['Body'].read().decode('utf-8')
print(result)