# Overview

SageMaker Inference Recommender is a new capability of SageMaker that reduces the time required to get machine learning (ML) models in production by automating performance benchmarking and load testing models across SageMaker ML instances. You can use Inference Recommender to deploy your model to a real-time inference endpoint that delivers the best performance at the lowest cost.

Inference recommender

In [2]:
%%capture
! pip install --upgrade sagemaker

# Model training and evaluation

### Data exploration 
refer to the link below for additional information about the data set 
[Regression with Amazon SageMaker Linear Learner algorithm](https://sagemaker-examples.readthedocs.io/en/latest/introduction_to_amazon_algorithms/linear_learner_abalone/Linear_Learner_Regression_csv_format.html#Regression-with-Amazon-SageMaker-Linear-Learner-algorithm)

In [3]:
import os
import boto3
import re
import sagemaker
import time
import numpy as np
import pandas as pd
from datetime import datetime, timedelta


role = sagemaker.get_execution_role()
region = boto3.Session().region_name
sm_client = boto3.client('sagemaker', region_name=region)

# S3 bucket for training data.
# Feel free to specify a different bucket and prefix.
data_bucket = f"sagemaker-example-files-prod-{region}"
data_prefix = "datasets/tabular/uci_abalone"


# S3 bucket for saving code and model artifacts.
# Feel free to specify a different bucket and prefix
output_bucket = sagemaker.Session().default_bucket()
output_prefix = "sagemaker/DEMO-linear-learner-abalone-regression"
xgb_output_prefix = "sagemaker/DEMO-xgb-abalone-regression"

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


In [4]:
s3 = boto3.client("s3")

FILE_TRAIN = "abalone_dataset1_train.csv"
FILE_TEST = "abalone_dataset1_test.csv"
FILE_VALIDATION = "abalone_dataset1_validation.csv"

# downloading the train, test, and validation files from data_bucket
s3.download_file(data_bucket, f"{data_prefix}/train_csv/{FILE_TRAIN}", FILE_TRAIN)
s3.download_file(data_bucket, f"{data_prefix}/test_csv/{FILE_TEST}", FILE_TEST)
s3.download_file(data_bucket, f"{data_prefix}/validation_csv/{FILE_VALIDATION}", FILE_VALIDATION)
s3.upload_file(FILE_TRAIN, output_bucket, f"{output_prefix}/train/{FILE_TRAIN}")
s3.upload_file(FILE_TEST, output_bucket, f"{output_prefix}/test/{FILE_TEST}")
s3.upload_file(FILE_VALIDATION, output_bucket, f"{output_prefix}/validation/{FILE_VALIDATION}")

In [5]:
df = pd.read_csv(
    FILE_TRAIN,
    sep=",",
    encoding="latin1",
    names=[
        "age",
        "sex",
        "Length",
        "Diameter",
        "Height",
        "Whole.weight",
        "Shucked.weight",
        "Viscera.weight",
        "Shell.weight",
    ],
)
print(df.head(1))

   age  sex  Length  Diameter  Height  Whole.weight  Shucked.weight  \
0    8    2   0.615      0.48    0.16        1.2525           0.585   

   Viscera.weight  Shell.weight  
0          0.2595          0.33  


### Configure train and validation channel

In [6]:
# creating the inputs for the fit() function with the training and validation location
s3_train_data = f"s3://{output_bucket}/{output_prefix}/train"
print(f"training files will be taken from: {s3_train_data}")
s3_validation_data = f"s3://{output_bucket}/{output_prefix}/validation"
print(f"validation files will be taken from: {s3_validation_data}")
output_location = f"s3://{output_bucket}/{output_prefix}/output"
print(f"training artifacts output location: {output_location}")

# generating the session.s3_input() format for fit() accepted by the sdk
train_data = sagemaker.inputs.TrainingInput(
    s3_train_data,
    distribution="FullyReplicated",
    content_type="text/csv",
    s3_data_type="S3Prefix",
    record_wrapping=None,
    compression=None,
)
validation_data = sagemaker.inputs.TrainingInput(
    s3_validation_data,
    distribution="FullyReplicated",
    content_type="text/csv",
    s3_data_type="S3Prefix",
    record_wrapping=None,
    compression=None,
)

training files will be taken from: s3://sagemaker-us-east-1-220653215381/sagemaker/DEMO-linear-learner-abalone-regression/train
validation files will be taken from: s3://sagemaker-us-east-1-220653215381/sagemaker/DEMO-linear-learner-abalone-regression/validation
training artifacts output location: s3://sagemaker-us-east-1-220653215381/sagemaker/DEMO-linear-learner-abalone-regression/output


### Configure the linear estimator

In [7]:
# getting the linear learner image according to the region
from sagemaker.image_uris import retrieve

container = retrieve("linear-learner", boto3.Session().region_name, version="1")
print(container)
deploy_amt_model = False

382416733822.dkr.ecr.us-east-1.amazonaws.com/linear-learner:1


In [8]:
%%time
import boto3
import sagemaker
from time import gmtime, strftime

sess = sagemaker.Session()

job_name = "DEMO-linear-learner-abalone-regression-" + strftime("%Y%m%d-%H-%M-%S", gmtime())
print("Training job", job_name)

linear = sagemaker.estimator.Estimator(
    container,
    role,
    input_mode="File",
    instance_count=1,
    instance_type="ml.m4.xlarge",
    output_path=output_location,
    sagemaker_session=sess,
)

linear.set_hyperparameters(
    feature_dim=8,
    epochs=16,
    wd=0.01,
    loss="absolute_loss",
    predictor_type="regressor",
    normalize_data=True,
    optimizer="adam",
    mini_batch_size=100,
    lr_scheduler_step=100,
    lr_scheduler_factor=0.99,
    lr_scheduler_minimum_lr=0.0001,
    learning_rate=0.1,
)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
Training job DEMO-linear-learner-abalone-regression-20231116-03-40-51
CPU times: user 33.1 ms, sys: 3.45 ms, total: 36.6 ms
Wall time: 36.4 ms


### Train the linear model

In [9]:
%%time
linear.fit(inputs={"train": train_data, "validation": validation_data}, job_name=job_name)

INFO:sagemaker:Creating training-job with name: DEMO-linear-learner-abalone-regression-20231116-03-40-51


2023-11-16 03:41:06 Starting - Starting the training job...
2023-11-16 03:41:30 Starting - Preparing the instances for training.........
2023-11-16 03:42:47 Downloading - Downloading input data...
2023-11-16 03:43:30 Training - Downloading the training image......
2023-11-16 03:44:31 Training - Training image download completed. Training in progress....[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
[34m[11/16/2023 03:44:50 INFO 140126792275776] Reading default configuration from /opt/amazon/lib/python3.8/site-packages/algorithm/resources/default-input.json: {'mini_batch_size': '1000', 'epochs': '15', 'feature_dim': 'auto', 'use_bias': 'true', 'binary_classifier_model_selection_criteria': 'accuracy', 'f_beta': '1.0', 'target_recall': '0.8', 'target_precision': '0.8', 'num_models': 'auto', 'num_calibration_samples': '10000000', 'init_method': 'uniform', 'init_scale': '0.07', 'init_sigma': '0.01', 'init_bias': '0.0', 'o

In [35]:
linear_model_uri = linear.model_data
print(linear_model_uri)

s3://sagemaker-us-east-1-220653215381/sagemaker/DEMO-linear-learner-abalone-regression/output/DEMO-linear-learner-abalone-regression-20231115-23-08-45/output/model.tar.gz


### Configure the xgboost estimator

In [10]:
from sagemaker.xgboost.estimator import XGBoost

xgb_container = sagemaker.image_uris.retrieve("xgboost", sess.boto_region_name, "1.7-1")
display(xgb_container)

INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.


'683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.7-1'

In [11]:
xgb_job_name = "xgboost-abalone-regression-" + strftime("%Y%m%d-%H-%M-%S", gmtime())
print("Training job", job_name)

xgb = sagemaker.estimator.Estimator(
    xgb_container,
    role,
    instance_count=1,
    instance_type="ml.m5.xlarge",
    output_path="s3://{}/{}/output".format(output_bucket, xgb_output_prefix),
    sagemaker_session=sess,
)
xgb.set_hyperparameters(
    max_depth=5,
    eta=0.2,
    gamma=4,
    min_child_weight=6,
    subsample=0.8,
    verbosity=0,
    objective="reg:squarederror",
    num_round=50,
)

Training job DEMO-linear-learner-abalone-regression-20231116-03-40-51


Train the XGBoost model

In [12]:
xgb.fit(inputs={"train": train_data, "validation": validation_data}, job_name=xgb_job_name)

INFO:sagemaker:Creating training-job with name: xgboost-abalone-regression-20231116-03-47-31


2023-11-16 03:47:38 Starting - Starting the training job...
2023-11-16 03:47:53 Starting - Preparing the instances for training......
2023-11-16 03:49:08 Downloading - Downloading input data......
2023-11-16 03:50:04 Training - Training image download completed. Training in progress...[34m[2023-11-16 03:50:12.197 ip-10-2-107-168.ec2.internal:7 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2023-11-16 03:50:12.231 ip-10-2-107-168.ec2.internal:7 INFO profiler_config_parser.py:111] User has disabled profiler.[0m
[34m[2023-11-16:03:50:12:INFO] Imported framework sagemaker_xgboost_container.training[0m
[34m[2023-11-16:03:50:12:INFO] Failed to parse hyperparameter objective value reg:squarederror to Json.[0m
[34mReturning the value itself[0m
[34m[2023-11-16:03:50:12:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2023-11-16:03:50:12:INFO] Running XGBoost Sagemaker in algorithm mode[0m
[34m[2023-11-16:03:50:12:INFO] Determined 0 GPU(s) available on t

In [14]:
xgb_model_uri = xgb.model_data
print(xgb_model_uri)

s3://sagemaker-us-east-1-220653215381/sagemaker/DEMO-xgb-abalone-regression/output/xgboost-abalone-regression-20231116-03-47-31/output/model.tar.gz


### Training with automatic model tuning (HPO)

In [40]:
import time
from sagemaker.tuner import IntegerParameter, ContinuousParameter
from sagemaker.tuner import HyperparameterTuner

job_name = "DEMO-ll-aba-" + time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())
print("Tuning job name: ", job_name)

# Linear Learner tunable hyper parameters can be found here https://docs.aws.amazon.com/sagemaker/latest/dg/linear-learner-tuning.html
hyperparameter_ranges = {
    "wd": ContinuousParameter(1e-7, 1, scaling_type="Auto"),
    "learning_rate": ContinuousParameter(1e-5, 1, scaling_type="Auto"),
    "mini_batch_size": IntegerParameter(100, 2000, scaling_type="Auto"),
}

# Increase the total number of training jobs run by AMT, for increased accuracy (and training time).
max_jobs = 6
# Change parallel training jobs run by AMT to reduce total training time, constrained by your account limits.
# if max_jobs=max_parallel_jobs then Bayesian search turns to Random.
max_parallel_jobs = 2


hp_tuner = HyperparameterTuner(
    linear,
    "validation:mse",
    hyperparameter_ranges,
    max_jobs=max_jobs,
    max_parallel_jobs=max_parallel_jobs,
    objective_type="Minimize",
)


# Launch a SageMaker Tuning job to search for the best hyperparameters
hp_tuner.fit(inputs={"train": train_data, "validation": validation_data}, job_name=job_name)

INFO:sagemaker:Creating hyperparameter tuning job with name: DEMO-ll-aba-2023-11-15-23-16-47


Tuning job name:  DEMO-ll-aba-2023-11-15-23-16-47
...................................................................................!


# Deploy model

### Create model definition

In [15]:
model_name = f"xgb-abalone-{datetime.now():%Y-%m-%d-%H-%M-%S}"
print(model_name)

##sess.create_model(name=model_name, container_defs={"Image": xgb_container, "ModelDataUrl": xgb_model_uri})
create_model_response = sm_client.create_model(ModelName = model_name,ExecutionRoleArn = role,PrimaryContainer = {'Image': xgb_container,'ModelDataUrl': xgb_model_uri})

xgb-abalone-2023-11-16-03-52-54


### Create endpoint

In [16]:
from sagemaker.session import production_variant

# Create an endpoint with the 2 production variants
endpoint_name = f"xgb-Abalone-{datetime.now():%Y-%m-%d-%H-%M-%S}"
 
xgbvariant = production_variant(
	model_name=model_name,
	instance_type="ml.c5.xlarge",
	initial_instance_count=1,
	variant_name="xgb-variant",
	initial_weight=1
	)

endpoint_response = sess.endpoint_from_production_variants(
	name=endpoint_name,
	production_variants=[xgbvariant]
	)

INFO:sagemaker:Creating endpoint-config with name xgb-Abalone-2023-11-16-03-54-28
INFO:sagemaker:Creating endpoint with name xgb-Abalone-2023-11-16-03-54-28


----!

In [17]:
## Check the status of the endpoint of update 
print ("The current status of the endpoint is: {}".format(sm_client.describe_endpoint(EndpointName = endpoint_name)["EndpointStatus"]))

The current status of the endpoint is: InService


### Test the endpoint 

In [44]:
#### Prepare data for inference

In [18]:
import pandas as pd
df = pd.read_csv(f"{FILE_TRAIN}")
df2 = df.iloc[:, 1:]

df2.to_csv('abalone_inference_data.csv', index=False)

In [19]:
sm_runtime = boto3.Session().client("sagemaker-runtime")
#send-test-data
print(f"Sending test traffic to the endpoint {endpoint_name}. \nPlease wait...")

error_count = 0
for i in range(0,1):
    with open("abalone_inference_data.csv", "r") as f:
        for row in f:
            print(".", end="", flush=True)
            payload = row.rstrip("\n")
            try:
                response = sm_runtime.invoke_endpoint(
                    EndpointName=endpoint_name, ContentType="text/csv", Body=payload
                )
                response["Body"].read()
                print(".", end="", flush=True)
            except Exception as e:
                print("E", end="", flush=True)
                error_count += 1
                if error_count >= 5:
                    break
            time.sleep(0.1)
f.close()
print("Done!")

Sending test traffic to the endpoint xgb-Abalone-2023-11-16-03-54-28. 
Please wait...
..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

# Create Archive of Sample Workload
We need to create an archive that contains individual files that Inference Recommender can send to your SageMaker Endpoints. Inference Recommender will randomly sample files from this archive so make sure it contains a similar distribution of payloads you'd expect in production. Note that your inference code must be able to read in the file formats from the sample payload.

Here we are only adding a single CSV file for the example. In your own use case(s), it's recommended to add a variety of samples that is representative of your payloads.

In [20]:
from sagemaker.s3 import S3Downloader, S3Uploader
# tar inference data and upload to S3, which we will use for the payload for the inference recommender
!tar -czvf ./abalone_inference_data.tar.gz ./abalone_inference_data.csv
inference_prefix = 'inference-data'
# Upload models
inference_data_uri = S3Uploader.upload(local_path="abalone_inference_data.tar.gz", desired_s3_uri=f"s3://{output_bucket}/{xgb_output_prefix}/{inference_prefix}")
print(inference_data_uri)

./abalone_inference_data.csv
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
s3://sagemaker-us-east-1-220653215381/sagemaker/DEMO-xgb-abalone-regression/inference-data/abalone_inference_data.tar.gz


# Inference recommender

## Default inference recommender jobs

### Configure parameters for default inference recommender jobs
Inference Recommender uses metadata about your ML model to recommend the best instance types and endpoint configurations for deployment. You can provide as much or as little information as you'd like but the more information you provide, the better your recommendations will be.

In [21]:
list_model_metadata_response=sm_client.list_model_metadata()
for model_metadata in list_model_metadata_response['ModelMetadataSummaries']:
    if model_metadata['Framework'] == 'XGBOOST':
        print(model_metadata)

{'Domain': 'MACHINE_LEARNING', 'Framework': 'XGBOOST', 'Task': 'CLASSIFICATION', 'Model': 'xgboost', 'FrameworkVersion': '1.0-1'}
{'Domain': 'MACHINE_LEARNING', 'Framework': 'XGBOOST', 'Task': 'REGRESSION', 'Model': 'xgboost', 'FrameworkVersion': '1.3-1'}


In [22]:
## Uncomment if you did not store the domain and task in a previous step
ml_domain = 'MACHINE_LEARNING'
ml_task = 'REGRESSION'

## Uncomment if you did not store the framework and framework version in a previous step
framework = 'XGBOOST'
framework_version = '1.0-1'

# The name of the ML model as standardized by common model zoos
nearest_model_name = 'xgboost'

# The supported MIME types for input and output data. In this example, 
# we are using images as input
input_content_type='text/csv'

In [23]:
# Create a dictionary to use as input for creating an inference recommendation job
container_config = {
        "Domain": ml_domain,
        "Framework": framework, 
        "FrameworkVersion": framework_version,
        "NearestModelName": nearest_model_name,
        "PayloadConfig": { 
            "SamplePayloadUrl": inference_data_uri,
            "SupportedContentTypes": [ input_content_type ]
         },
        "Task": ml_task,
        }

### Create default inference recommender jobs
Inference recommendations (Default job type) run a set of load tests on the recommended instance types. You can also load test for a serverless endpoint..

In [24]:
## We'll using inference recommender for one of the producttion variants
recommender_model_name = model_name

# Provide a unique job name for SageMaker Inference Recommender job
job_name = f'abalone-recommender-job-name-{datetime.now():%Y-%m-%d-%H-%M-%S}'

# Inference Recommender job type. Set to Default to get an initial recommendation
job_type = 'Default'

sm_client.create_inference_recommendations_job(
    JobName = job_name,
    JobType = job_type,
    RoleArn = role,
    # Provide only one of model package ARN or model name, not both. 
    # If you would like to create an inference recommendations job with a model name,
    # uncomment ModelName and ContainerConfig, and comment out ModelPackageVersionArn.
    InputConfig = {
        'ModelName': recommender_model_name,
        'ContainerConfig': container_config
        
    }
    
)

{'JobArn': 'arn:aws:sagemaker:us-east-1:220653215381:inference-recommendations-job/abalone-recommender-job-name-2023-11-16-04-08-25',
 'ResponseMetadata': {'RequestId': 'ae416f2a-08e6-42b3-8e24-4202f9c97611',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'ae416f2a-08e6-42b3-8e24-4202f9c97611',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '132',
   'date': 'Thu, 16 Nov 2023 04:08:26 GMT'},
  'RetryAttempts': 0}}

## Advanced Inference recommender job
Endpoint recommendations (Advanced job type) are based on a custom load test where you select your desired ML instances or a serverless endpoint, provide a custom traffic pattern, and provide requirements for latency and throughput based on your production requirements. This job takes an average of 2 hours to complete depending on the job duration set and the total number of inference configurations tested.

### Configure advanced recommender job
Configure and run an advanced Inference Recommender job to perform a custom load test to simulate user interactions with the application. This helps you find the right configurations to satisfy latency, concurrency, and cost for this use case.

In [25]:
from sagemaker.parameter import CategoricalParameter 
from sagemaker.inference_recommender.inference_recommender_mixin import (  
    Phase,  
    ModelLatencyThreshold 
) 

hyperparameter_ranges = [ 
    { 
        "instance_types": CategoricalParameter(["ml.m5.2xlarge", "ml.m5.4xlarge", "ml.g4dn.xlarge"]), 
        'OMP_NUM_THREADS': CategoricalParameter(['3','4','5']), 
    } 
] 

phases = [ 
    Phase(duration_in_seconds=120, initial_number_of_users=2, spawn_rate=2),
] 

model_latency_thresholds = [ 
    ModelLatencyThreshold(percentile="P95", value_in_milliseconds=100) 
]

In [26]:
from sagemaker.model import Model
from sagemaker import image_uris
import sagemaker

model = Model(
    model_data=xgb_model_uri,
    role=role,
    image_uri = sagemaker.image_uris.retrieve(framework="xgboost", region=region, version="1.7-1", py_version="py3", 
                                              image_scope='inference'),
    sagemaker_session=sess
    )


INFO:sagemaker.image_uris:Ignoring unnecessary Python version: py3.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.


## Run the recommender job
We recently released Python SDK support for Inference Recommender. You can now run default and advanced jobs using a single function: [right_size](https://sagemaker.readthedocs.io/en/stable/api/inference/model.html#sagemaker.model.Model.right_size). Based on the parameters of the function call, Inference Recommender infers if it should run default or advanced jobs. This greatly simplifies the use of Inference Recommender using the Python SDK. To run the Inference Recommender job, complete the following steps

In [None]:

advanced_job_name=f"advanced-abalone-recommender-job-{str(round(time.time()))}"

model.right_size( 
    sample_payload_url=inference_data_uri, 
    supported_content_types=["text/csv"], 
    framework= framework, 
    job_duration_in_seconds=7200, 
    hyperparameter_ranges=hyperparameter_ranges, 
    phases=phases, # TrafficPattern 
    max_invocations=30000, # StoppingConditions 
    model_latency_thresholds=model_latency_thresholds,
    job_name=advanced_job_name
)


INFO:sagemaker:Advanced Job parameters were specified. Running Advanced job...
INFO:sagemaker:Creating model with name: sagemaker-xgboost-2023-11-16-04-09-11-338
INFO:sagemaker:Creating Inference Recommendations job with name: advanced-abalone-recommender-job-1700107751


Fetching logs from CloudWatch...
Retrieved logStream: advanced-abalone-recommender-job-1700107751/execution from logGroup: /aws/sagemaker/InferenceRecommendationsJobs
[INFO] Starting Inference Recommendations Job advanced-abalone-recommender-job-1700107751
[INFO] Job type is LOAD_TEST
[INFO] Model name provided; retrieving model details from model: sagemaker-xgboost-2023-11-16-04-09-11-338
[INFO] Validating existence of sample payload
[INFO] Successfully validated existence of sample payload. Validating size of sample payload
[INFO] Successfully validated size of sample payload.
[INFO] Successfully retrieved model details.
[INFO] Validating job inputs
[INFO] Image url is: 683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.7-1
[INFO] Image type is: ALGORITHM
[INFO] SupportedRealtimeInferenceInstanceTypes not provided, using default list.
[INFO] Provided SupportedRealtimeInferenceInstanceTypes are: [ml.c4.2xlarge, ml.c4.4xlarge, ml.c4.8xlarge, ml.c4.large, ml.c4.xlarge, ml.

### Check the status of the inference recommendation job

In [28]:
## Check the status of the inference recommendation
import pprint
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

finished = False
while not finished:
    inference_recommender_job = sm_client.describe_inference_recommendations_job(
        JobName=job_name
    )
    if inference_recommender_job["Status"] in ["COMPLETED", "STOPPED", "FAILED"]:
        finished = True
    else:
        print("In progress")
        time.sleep(300)

if inference_recommender_job["Status"] == "FAILED":
    print("Inference recommender job failed ")
    print("Failed Reason: {}".format(inference_recommender_job["FailureReason"]))
else:
    print("Inference recommender job completed")

Inference recommender job completed


### View inference recommender job output

In [29]:
data = [
    {**x["EndpointConfiguration"], **x["ModelConfiguration"], **x["Metrics"]}
    for x in inference_recommender_job["InferenceRecommendations"]
]
df = pd.DataFrame(data)
dropFilter = df.filter(["VariantName"])
df.drop(dropFilter, inplace=True, axis=1)
pd.set_option("max_colwidth", 400)
df.head()

Unnamed: 0,EndpointName,InstanceType,InitialInstanceCount,EnvironmentParameters,CostPerHour,CostPerInference,MaxInvocations,ModelLatency,CpuUtilization,MemoryUtilization,ServerlessConfig,CompilationJobName
0,abalone-recommender-job-name-2023-11-16--WiVRAaKECJ7NzaVqegdR,ml.c5.large,1.0,"[{'Key': 'SAGEMAKER_MODEL_SERVER_WORKERS', 'ValueType': 'String', 'Value': '2'}, {'Key': 'OMP_NUM_THREADS', 'ValueType': 'String', 'Value': '1'}]",0.102,6.561173e-07,2591,208,182.044006,10.1236,,
1,abalone-recommender-job-name-2023-11-16--dDaCsnrT1JJMql88OXcG,ml.g4dn.xlarge,1.0,"[{'Key': 'SAGEMAKER_MODEL_SERVER_WORKERS', 'ValueType': 'String', 'Value': '1'}, {'Key': 'OMP_NUM_THREADS', 'ValueType': 'String', 'Value': '1'}]",0.736,5.629493e-06,2179,96,102.608002,1.57255,,
2,abalone-recommender-job-name-2023-11-16--wZGBSldNeoWODfXaKN42,ml.c5.xlarge,1.0,"[{'Key': 'SAGEMAKER_MODEL_SERVER_WORKERS', 'ValueType': 'String', 'Value': '4'}, {'Key': 'OMP_NUM_THREADS', 'ValueType': 'String', 'Value': '1'}]",0.204,6.874242e-07,4946,143,361.787994,7.49921,,
3,abalone-recommender-job-name-2023-11-16--ZMe9xifzimxvk4R7tjNy,,,[],0.36,9.615384e-06,624,35,,8.650984,"{'MemorySizeInMB': 5120, 'MaxConcurrency': 1}",
4,abalone-recommender-job-name-2023-11-16--ke5M16q1pXNPEYeVPlA3,ml.c5.2xlarge,1.0,"[{'Key': 'SAGEMAKER_MODEL_SERVER_WORKERS', 'ValueType': 'String', 'Value': '8'}, {'Key': 'OMP_NUM_THREADS', 'ValueType': 'String', 'Value': '1'}]",0.408,6.535948e-07,10404,155,708.270996,6.51583,,


## Cleanup

In [83]:
sm_client.delete_endpoint(EndpointName=endpoint_name)

{'ResponseMetadata': {'RequestId': 'd875cf62-5320-4c23-ac3e-3d96bc19a2c7',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'd875cf62-5320-4c23-ac3e-3d96bc19a2c7',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Wed, 08 Nov 2023 19:30:51 GMT'},
  'RetryAttempts': 0}}