In [38]:
import boto3

sagemaker = boto3.client('sagemaker')

model_name='spark-regression-model-009'
endpoint_config_name='inference-server-009a-config'
endpoint_name='inference-server-009a'

## Create SageMaker Model
role = 'arn:aws:iam::<account number>:role/sagemaker_role'
primary_container = {
    'Image': '<account number>.dkr.ecr.us-west-2.amazonaws.com/inference-server:latest',
    'ModelDataUrl': 's3://<bucket>/models/spark-regression-model/model.tgz'
}

create_model_response = sagemaker.create_model(
    ModelName = model_name,
    ExecutionRoleArn = role,
    PrimaryContainer = primary_container)

print(create_model_response['ModelArn'])



In [39]:
## Create EndPoint Config
response = sagemaker.create_endpoint_config(
    EndpointConfigName=endpoint_config_name,
    ProductionVariants=[
        {
            'VariantName': 'default-variant-name',
            'ModelName': model_name,
            'InitialInstanceCount': 1,
            'InstanceType': 'ml.m4.xlarge'
        },
    ])

print (response)



In [40]:
## Create Sagemaker Endpoint
response = sagemaker.create_endpoint(
    EndpointName=endpoint_name,
    EndpointConfigName=endpoint_config_name,
)

print (response)



In [52]:
%%time
## Test the Endpoint
import boto3, io, json

client = boto3.client('sagemaker-runtime')

input='{"schema":{"fields":[{"name":"Price","type":"double"},{"name":"Mileage","type":"integer"}, \
{"name":"Make","type":"string"},{"name":"Model","type":"string"},{"name":"Trim","type":"string"}, \
{"name":"Type","type":"string"},{"name":"Cylinder","type":"integer"},{"name":"Liter","type":"double"}, \
{"name":"Doors","type":"integer"},{"name":"Cruise","type":"integer"},{"name":"Sound","type":"integer"}, \
{"name":"Leather","type":"integer"}]}, \
"rows":[[9041.9062544231,26191,"Chevrolet","AVEO","SVM Sedan 4D","Sedan",4,1.6,4,0,0,1]]}'
    
response = client.invoke_endpoint(
    EndpointName=endpoint_name,
    Body=input,
    ContentType='application/json',
    Accept='application/json'
)

res_json = json.loads(response['Body'].read().decode("utf-8"))

print (json.dumps(res_json, indent=2))

{
  "schema": {
    "fields": [
      {
        "name": "prediction",
        "type": {
          "type": "basic",
          "base": "double",
          "isNullable": false
        }
      }
    ]
  },
  "rows": [
    [
      10236.175823272792
    ]
  ]
}
CPU times: user 16 ms, sys: 0 ns, total: 16 ms
Wall time: 85.1 ms
