# Real Time Inference Endpoint

## Deploy as Real Time Inference
- Since the model has been trained with good accuracy we can deploy it.
- First we need to write the 4 functions for model inference in a .py script
- Sagemaker API documentation: https://sagemaker.readthedocs.io/en/stable/api/index.html

In [1]:
%%writefile serve.py

import os
import json
import joblib
import pandas as pd
import numpy as np

# Load the Model
def model_fn(model_dir):
    model_file_name = "pipeline_knn_model.joblib"
    pipeline_knn_model = joblib.load(os.path.join(model_dir, model_file_name))
    
    return pipeline_knn_model

# Load the input data
def input_fn(request_body, request_content_type):
    """An input_fn that loads a pickled numpy array"""
    if request_content_type == "application/json":
        data = json.loads(request_body)
        input_object = pd.DataFrame(data)
        
        return input_object
    else:
        # Handle other content-types here or raise an Exception
        # if the content type is not supported.
        pass

def predict_fn(input_object, pipeline_knn_model):
    prediction = pipeline_knn_model.predict(input_object).tolist()
    pred_prob = pipeline_knn_model.predict_proba(input_object).tolist()
    
    prediction_object = {
        "predictions": prediction, 
        "pred_prob": pred_prob
    }
    
    return prediction_object

def output_fn(prediction_object, request_content_type):
    return_object = json.dumps(prediction_object)
    
    return return_object

Overwriting serve.py


In [2]:
%%writefile requirements.txt
pandas
numpy

Overwriting requirements.txt


In [3]:
# Deploy the model
from sagemaker.sklearn.model import SKLearnModel
from sagemaker import get_execution_role

model_artifact = "s3://sagemaker-us-east-1-298138509966/knn-pipeline-run-2022-06-10-12-07-17-866/output/model.tar.gz"
endpoint_name = "heart-disease-knn-pipeline-model"

model = SKLearnModel(
    name=endpoint_name,
    framework_version="0.23-1",
    entry_point="serve.py",
    dependencies=["requirements.txt"],
    model_data=model_artifact,
    role=get_execution_role(),
)

predictor = model.deploy(instance_type="ml.m5.large", initial_instance_count=1)

Using already existing model: heart-disease-knn-pipeline-model


------!

In [4]:
endpoint_name = predictor.endpoint_name
print("Endpoint name:")
print(f"{endpoint_name}")

Endpoint name:
heart-disease-knn-pipeline-model-2022-06-10-12-29-40-108


## Invoke the model
- boto3 documentation: https://boto3.amazonaws.com/v1/documentation/api/latest/index.html

In [28]:
# Load some data that we want to make predictions on
import pandas as pd
import json
test_df = pd.read_csv("s3://sagemaker-us-east-1-298138509966/sagemaker/heart_disease/test.csv")

X_test = test_df.drop("target", axis=1)
y_test = test_df["target"]

# Get two rows to make predictions on
X_pred = X_test.head(2).to_dict(orient="records")

# Convert the list of dictionaries to a json string
X_pred = json.dumps(X_pred)
X_pred

[{'age': 45.0, 'sex': 0.0, 'cp': 2.0, 'trestbps': 112.0, 'chol': 160.0, 'fbs': 0.0, 'restecg': 0.0, 'thalach': 138.0, 'exang': 0.0, 'oldpeak': 0.0, 'slope': 2.0, 'ca': 0.0, 'thal': 3.0}, {'age': 52.0, 'sex': 1.0, 'cp': 4.0, 'trestbps': 112.0, 'chol': 230.0, 'fbs': 0.0, 'restecg': 0.0, 'thalach': 160.0, 'exang': 0.0, 'oldpeak': 0.0, 'slope': 1.0, 'ca': 1.0, 'thal': 3.0}]


'[{"age": 45.0, "sex": 0.0, "cp": 2.0, "trestbps": 112.0, "chol": 160.0, "fbs": 0.0, "restecg": 0.0, "thalach": 138.0, "exang": 0.0, "oldpeak": 0.0, "slope": 2.0, "ca": 0.0, "thal": 3.0}, {"age": 52.0, "sex": 1.0, "cp": 4.0, "trestbps": 112.0, "chol": 230.0, "fbs": 0.0, "restecg": 0.0, "thalach": 160.0, "exang": 0.0, "oldpeak": 0.0, "slope": 1.0, "ca": 1.0, "thal": 3.0}]'

In [20]:
# Submit to the endpoint
import boto3
import json
sm_runtime = boto3.client("sagemaker-runtime")
endpoint_name = "heart-disease-knn-pipeline-model-2022-06-10-12-29-40-108"

response = sm_runtime.invoke_endpoint(EndpointName=endpoint_name, 
                                Body=X_pred, 
                                ContentType="application/json", 
                                Accept="application/json")

In [21]:
# Decode the response from the endpoint
response_body = response['Body']
response_str = response_body.read().decode('utf-8')
response_dict = json.loads(response_str)

print(response_dict)

{'predictions': [0, 1], 'pred_prob': [[1.0, 0.0], [0.4, 0.6]]}


## Cleanup
- Delete the model
- Delete the endpoint

In [None]:
import boto3
sm_client = boto3.client("sagemaker")
endpoint_name = "heart-disease-knn-pipeline-model-2022-06-10-12-29-40-108"

# Get the model name from endpoint_name
response = sm_client.describe_endpoint_config(EndpointConfigName=endpoint_name)
model_name = response['ProductionVariants'][0]['ModelName']

# Delete the model
sm_client.delete_model(ModelName=model_name)

# Delete the endpoint
sm_client.delete_endpoint(EndpointName=endpoint_name)