# Real Time Inference Endpoint

In [None]:
!pip install -U pandas pandas-profiling scikit-learn sagemaker

## Create the inference script
- Since the model has been trained with good accuracy we can deploy it.
- First we need to write the 4 functions for model inference in a .py script
- Sagemaker API documentation: https://sagemaker.readthedocs.io/en/stable/api/index.html

In [22]:
%%writefile serve.py

import os
import joblib
import pandas as pd

# Load the Model
def model_fn(model_dir):
    model_file_name = "pipeline_model.joblib"
    pipeline_model = joblib.load(os.path.join(model_dir, model_file_name))
    
    return pipeline_model

# Load the input data
def input_fn(request_body, request_content_type):
    """An input_fn that loads a pickled numpy array"""
    if request_content_type == "application/json":
        input_object = pd.read_json(request_body, lines=True)
        
        return input_object
    else:
        raise ValueError("Only application/json content type supported!")

def predict_fn(input_object, pipeline_model):
    predictions = pipeline_model.predict(input_object)
    pred_probs = pipeline_model.predict_proba(input_object)
    
    prediction_object = pd.DataFrame(
        {
            "prediction": predictions.tolist(),
            "pred_prob_class0": pred_probs[:, 0].tolist(),
            "pred_prob_class1": pred_probs[:, 1].tolist()
        }
    )
    
    return prediction_object

def output_fn(prediction_object, request_content_type):
    return_object = prediction_object.to_json(orient="records", lines=True)
    
    return return_object

Overwriting serve.py


In [2]:
%%writefile requirements.txt
pandas
numpy

Overwriting requirements.txt


## Real Time Endpoint Deployment

In [3]:
# Create the deployment
from sagemaker.sklearn.model import SKLearnModel
from sagemaker import get_execution_role

training_job_name = "knn-pipeline-tuner-220611-0916-006-77833ec6"
model_artifact = f"s3://sagemaker-us-east-1-298138509966/{training_job_name}/output/model.tar.gz"
endpoint_name = "heart-disease-knn-pipeline-model"

model = SKLearnModel(
    name=endpoint_name,
    framework_version="1.0-1",
    entry_point="serve.py",
    dependencies=["requirements.txt"],
    model_data=model_artifact,
    role=get_execution_role(),
)

In [4]:
# Deploy!
predictor = model.deploy(instance_type="ml.t2.medium", initial_instance_count=1)

--------!

In [5]:
endpoint_name = predictor.endpoint_name
print("Endpoint name:")
print(f"{endpoint_name}")

Endpoint name:
heart-disease-knn-pipeline-model-2022-06-12-05-22-47-198


## Invoke the model
- boto3 documentation: https://boto3.amazonaws.com/v1/documentation/api/latest/index.html

In [6]:
# Load some data that we want to make predictions on
import pandas as pd
test_df = pd.read_csv("s3://sagemaker-us-east-1-298138509966/sagemaker/heart_disease/test.csv")

X_test = test_df.drop("target", axis=1)
y_test = test_df["target"]

# Get two rows to make predictions on
X_pred = X_test.head(2).to_json(orient="records", lines=True)
X_pred

'{"age":45.0,"sex":0.0,"cp":2.0,"trestbps":112.0,"chol":160.0,"fbs":0.0,"restecg":0.0,"thalach":138.0,"exang":0.0,"oldpeak":0.0,"slope":2.0,"ca":0.0,"thal":3.0}\n{"age":52.0,"sex":1.0,"cp":4.0,"trestbps":112.0,"chol":230.0,"fbs":0.0,"restecg":0.0,"thalach":160.0,"exang":0.0,"oldpeak":0.0,"slope":1.0,"ca":1.0,"thal":3.0}\n'

In [16]:
# Submit to the endpoint
import boto3
import json
sm_runtime = boto3.client("sagemaker-runtime")

response = sm_runtime.invoke_endpoint(EndpointName=endpoint_name, 
                                      Body=X_pred, 
                                      ContentType="application/json", 
                                      Accept="application/json")

In [17]:
# Decode the response from the endpoint
response_body = response['Body']
response_str = response_body.read().decode('utf-8')
response_df = pd.read_json(response_str, lines=True)

response_df

Unnamed: 0,prediction,pred_prob_class0,pred_prob_class1
0,0,1.0,0.0
1,1,0.333333,0.666667


## Cleanup
- Delete the endpoint
- Delete the endpoint config
- Delete the model

In [19]:
import boto3

def cleanup(endpoint_name):
    sm_client = boto3.client("sagemaker")

    # Get the model name from endpoint_name
    response = sm_client.describe_endpoint_config(EndpointConfigName=endpoint_name)

    # Delete the endpoint
    sm_client.delete_endpoint(EndpointName=endpoint_name)

    # Delete the endpoint config
    endpoint_config_name = response['EndpointConfigName']
    sm_client.delete_endpoint_config(EndpointConfigName=endpoint_config_name)                        

    # Delete the model
    model_name = response['ProductionVariants'][0]['ModelName']
    sm_client.delete_model(ModelName=model_name)

In [20]:
# Run the cleanup
cleanup(endpoint_name)

# Multi-Model Endpoint
- You can deploy more than one model to same physical machine to save on costs!

In [3]:
# Create the multi-model deployment
from sagemaker.sklearn.model import SKLearnModel
from sagemaker.multidatamodel import MultiDataModel
from sagemaker import Session, get_execution_role

mme_name = "heart-disease-models"
mme_model_data_prefix = f"s3://sagemaker-us-east-1-298138509966/{mme_name}"

# Define the base model
base_model = SKLearnModel(
    framework_version="1.0-1",
    entry_point="serve.py",
    dependencies=["requirements.txt"],
    model_data=None,
    role=get_execution_role(),
    sagemaker_session = Session()
)

# Define the multi-model
mme = MultiDataModel(
    name = mme_name,
    model_data_prefix = mme_model_data_prefix,
    model = base_model,
    sagemaker_session = Session()
)

In [11]:
# Deploy the empty multi-model!
predictor = mme.deploy(instance_type="ml.t2.large", initial_instance_count=1)

-----------------!

In [12]:
# Add the first model to the multi-model
training_job_name = "knn-pipeline-tuner-220611-0916-006-77833ec6"
model_artifact = f"s3://sagemaker-us-east-1-298138509966/{training_job_name}/output/model.tar.gz"
model_name = f"{model_artifact.split('/')[-3]}_1.tar.gz"

mme.add_model(model_data_source=model_artifact, model_data_path=model_name)

# Add the second model to the multi-model
training_job_name = "knn-pipeline-tuner-220611-0916-006-77833ec6"
model_artifact = f"s3://sagemaker-us-east-1-298138509966/{training_job_name}/output/model.tar.gz"
model_name = f"{model_artifact.split('/')[-3]}_2.tar.gz"

mme.add_model(model_data_source=model_artifact, model_data_path=model_name)

's3://sagemaker-us-east-1-298138509966/heart-disease-models/knn-pipeline-run-2022-06-11-03-03-10-696_2.tar.gz'

In [13]:
# Check if all the models have been deployed
list(mme.list_models())

['/knn-pipeline-run-2022-06-11-03-03-10-696_1.tar.gz',
 '/knn-pipeline-run-2022-06-11-03-03-10-696_2.tar.gz']

## Invoke the models
- boto3 documentation: https://boto3.amazonaws.com/v1/documentation/api/latest/index.html

In [7]:
# Load some data that we want to make predictions on
import pandas as pd
import json
test_df = pd.read_csv("s3://sagemaker-us-east-1-298138509966/sagemaker/heart_disease/test.csv")

X_test = test_df.drop("target", axis=1)
y_test = test_df["target"]

# Get two rows to make predictions on
X_pred = X_test.head(2).to_json(orient="records", lines=True)
X_pred

'[{"age": 45.0, "sex": 0.0, "cp": 2.0, "trestbps": 112.0, "chol": 160.0, "fbs": 0.0, "restecg": 0.0, "thalach": 138.0, "exang": 0.0, "oldpeak": 0.0, "slope": 2.0, "ca": 0.0, "thal": 3.0}, {"age": 52.0, "sex": 1.0, "cp": 4.0, "trestbps": 112.0, "chol": 230.0, "fbs": 0.0, "restecg": 0.0, "thalach": 160.0, "exang": 0.0, "oldpeak": 0.0, "slope": 1.0, "ca": 1.0, "thal": 3.0}]'

In [17]:
# Submit to the endpoint
import boto3
import json
sm_runtime = boto3.client("sagemaker-runtime")
endpoint_name = mme_name

# Predictions from each model
for model_name in mme.list_models():
    response = sm_runtime.invoke_endpoint(EndpointName=endpoint_name, 
                                          TargetModel=model_name,
                                          Body=X_pred, 
                                          ContentType="application/json", 
                                          Accept="application/json")

    # Decode the response from the endpoint
    response_body = response['Body']
    response_str = response_body.read().decode('utf-8')
    response_dict = json.loads(response_str)
    
    print(model_name)
    print(response_dict)

/knn-pipeline-run-2022-06-11-03-03-10-696_1.tar.gz
{'predictions': [0, 1], 'pred_prob': [[1.0, 0.0], [0.4, 0.6]]}
/knn-pipeline-run-2022-06-11-03-03-10-696_2.tar.gz
{'predictions': [0, 1], 'pred_prob': [[1.0, 0.0], [0.4, 0.6]]}


## Cleanup
- Delete the endpoint
- Delete the endpoint config
- Delete the model

In [18]:
# Run the cleanup
cleanup(endpoint_name)

{'ResponseMetadata': {'RequestId': 'f4d731a1-7bdf-4231-a7f9-0a85bc714ed1',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'f4d731a1-7bdf-4231-a7f9-0a85bc714ed1',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Sat, 11 Jun 2022 03:35:27 GMT'},
  'RetryAttempts': 0}}

# Serverless

In [4]:
# Create the deployment
from sagemaker.sklearn.model import SKLearnModel
from sagemaker import get_execution_role

training_job_name = "knn-pipeline-tuner-220611-0916-006-77833ec6"
model_artifact = f"s3://sagemaker-us-east-1-298138509966/{training_job_name}/output/model.tar.gz"
endpoint_name = "heart-disease-knn-pipeline-model"

model = SKLearnModel(
    name=endpoint_name,
    framework_version="1.0-1",
    entry_point="serve.py",
    dependencies=["requirements.txt"],
    model_data=model_artifact,
    role=get_execution_role(),
)

In [5]:
# NEW! Create a config for serverless inference
from sagemaker.serverless import ServerlessInferenceConfig
serverless_config = ServerlessInferenceConfig(memory_size_in_mb=1024, max_concurrency=3)

In [6]:
# NEW! Deploy!
predictor = model.deploy(serverless_inference_config=serverless_config)

Using already existing model: heart-disease-knn-pipeline-model


-----------------*

UnexpectedStatusException: Error hosting endpoint heart-disease-knn-pipeline-model-2022-06-12-04-54-34-401: Failed. Reason: Received server error (0) from model with message "An error occurred while handling request as the model process exited.". See https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logEventViewer:group=/aws/sagemaker/Endpoints/heart-disease-knn-pipeline-model-2022-06-12-04-54-34-401 in account 298138509966 for more information..

In [None]:
endpoint_name = predictor.endpoint_name
print("Endpoint name:")
print(f"{endpoint_name}")

## Invoke the model
- boto3 documentation: https://boto3.amazonaws.com/v1/documentation/api/latest/index.html

In [8]:
# Load some data that we want to make predictions on
import pandas as pd
import json
test_df = pd.read_csv("s3://sagemaker-us-east-1-298138509966/sagemaker/heart_disease/test.csv")

X_test = test_df.drop("target", axis=1)
y_test = test_df["target"]

# Get two rows to make predictions on
X_pred = X_test.head(2).to_json(orient="records", lines=True)
X_pred

'[{"age": 45.0, "sex": 0.0, "cp": 2.0, "trestbps": 112.0, "chol": 160.0, "fbs": 0.0, "restecg": 0.0, "thalach": 138.0, "exang": 0.0, "oldpeak": 0.0, "slope": 2.0, "ca": 0.0, "thal": 3.0}, {"age": 52.0, "sex": 1.0, "cp": 4.0, "trestbps": 112.0, "chol": 230.0, "fbs": 0.0, "restecg": 0.0, "thalach": 160.0, "exang": 0.0, "oldpeak": 0.0, "slope": 1.0, "ca": 1.0, "thal": 3.0}]'

In [9]:
# Submit to the endpoint
import boto3
import json
sm_runtime = boto3.client("sagemaker-runtime")

response = sm_runtime.invoke_endpoint(EndpointName=endpoint_name, 
                                      Body=X_pred, 
                                      ContentType="application/json", 
                                      Accept="application/json")

In [10]:
# Decode the response from the endpoint
response_body = response['Body']
response_str = response_body.read().decode('utf-8')
response_dict = json.loads(response_str)

print(response_dict)

{'predictions': [0, 1], 'pred_prob': [[1.0, 0.0], [0.3333333333333333, 0.6666666666666666]]}


## Cleanup
- Delete the endpoint
- Delete the endpoint config
- Delete the model

In [None]:
# Run the cleanup
cleanup(endpoint_name)