This jupyter notebook shows how to configure and deploy your own keyword-based topic model using the Amazon SageMaker service as a general, serverless, compute environment.

## Build and Push Container to ECS

In [1]:
!bash build_and_push.sh josiah-topic-model

Login Succeeded
Sending build context to Docker daemon  128.5kB
Step 1/10 : FROM ubuntu:16.04
 ---> 0b1edfbffd27
Step 2/10 : MAINTAINER Amazon AI <sage-learner@amazon.com>
 ---> Using cache
 ---> 0b5849031ec8
Step 3/10 : RUN apt-get -y update && apt-get install -y --no-install-recommends          wget          python3          nginx          ca-certificates     && rm -rf /var/lib/apt/lists/*
 ---> Using cache
 ---> 5411fbf3c61a
Step 4/10 : RUN wget https://bootstrap.pypa.io/get-pip.py && python3 get-pip.py &&     pip install numpy scipy scikit-learn pandas flask gevent gunicorn &&         (cd /usr/local/lib/python3.5/dist-packages/scipy/.libs; rm *; ln ../../numpy/.libs/* .) &&         rm -rf /root/.cache
 ---> Using cache
 ---> 603e52b400a1
Step 5/10 : ENV PYTHONUNBUFFERED=TRUE
 ---> Using cache
 ---> 2dbe0389fa1c
Step 6/10 : ENV PYTHONDONTWRITEBYTECODE=TRUE
 ---> Using cache
 ---> 2a40daa24265
Step 7/10 : ENV PATH="/opt/program:${PATH}"
 ---> Using cache
 ---> 04e0e06b3d9d
Step 8/10 

## Create SageMaker Endpoint

Copy and paste the ECS repository that you have just created. `<account number>.dkr.ecr.us-east-1.amazonaws.com/<image name>:latest`

In [2]:
import boto3
sagemaker = boto3.client('sagemaker')

name = 'topic-model'
docker_image = '216321755658.dkr.ecr.us-east-1.amazonaws.com/josiah-topic-model:latest'
iam_role = 'arn:aws:iam::216321755658:role/service-role/AmazonSageMaker-ExecutionRole-20171204T150334'

In [3]:
sagemaker.create_model(
        ModelName=name, 
        PrimaryContainer={
            'Image': docker_image
        },
        ExecutionRoleArn=iam_role
        )

{'ModelArn': 'arn:aws:sagemaker:us-east-1:216321755658:model/topic-model',
 'ResponseMetadata': {'HTTPHeaders': {'content-length': '73',
   'content-type': 'application/x-amz-json-1.1',
   'date': 'Fri, 26 Oct 2018 02:19:46 GMT',
   'x-amzn-requestid': '48b4409c-c3e1-4517-8560-e543918a5584'},
  'HTTPStatusCode': 200,
  'RequestId': '48b4409c-c3e1-4517-8560-e543918a5584',
  'RetryAttempts': 0}}

In [4]:
sagemaker.create_endpoint_config(
        EndpointConfigName=name,
        ProductionVariants=[{
            'InstanceType':'ml.m4.xlarge',
            'InitialInstanceCount':1,
            'ModelName':name,
            'VariantName':'AllTraffic'}]
)

{'EndpointConfigArn': 'arn:aws:sagemaker:us-east-1:216321755658:endpoint-config/topic-model',
 'ResponseMetadata': {'HTTPHeaders': {'content-length': '92',
   'content-type': 'application/x-amz-json-1.1',
   'date': 'Fri, 26 Oct 2018 02:19:48 GMT',
   'x-amzn-requestid': 'd81f3712-3684-480c-9bcf-b53faa0c1d5e'},
  'HTTPStatusCode': 200,
  'RequestId': 'd81f3712-3684-480c-9bcf-b53faa0c1d5e',
  'RetryAttempts': 0}}

In [5]:
sagemaker.create_endpoint(
        EndpointName=name,
        EndpointConfigName=name,
        )

{'EndpointArn': 'arn:aws:sagemaker:us-east-1:216321755658:endpoint/topic-model',
 'ResponseMetadata': {'HTTPHeaders': {'content-length': '79',
   'content-type': 'application/x-amz-json-1.1',
   'date': 'Fri, 26 Oct 2018 02:19:50 GMT',
   'x-amzn-requestid': 'a030279d-e186-4e81-b499-594705f389d9'},
  'HTTPStatusCode': 200,
  'RequestId': 'a030279d-e186-4e81-b499-594705f389d9',
  'RetryAttempts': 0}}

## Check on Status of SageMaker Endpoint

In [12]:
sagemaker.describe_endpoint(EndpointName=name)['EndpointStatus']

'InService'

## Send a sample payload

In [13]:
import pandas, boto3, io

class TopicExtractor:
    '''
    Takes clean text documents in the form
    of a numpy array and returns topics associated
    with each document after calling an endpoint deployed using SageMaker.
    
    Arguments:
        input_data: data with each document as a row in a numpy array
        endpoint_name: name of the endpoint, as a string
    '''
    
    def __init__(self, input_data, endpoint_name):
        self.input_data = input_data
        self.endpoint_name = endpoint_name
        self.topics = None

    def extract(self):
        data_stream = io.StringIO()
        pandas.DataFrame(self.input_data).to_csv(data_stream, header=None, index=None)
        client = boto3.client('sagemaker-runtime')
        response = client.invoke_endpoint(EndpointName=self.endpoint_name, Body=data_stream.getvalue(), ContentType='text/csv', Accept='Accept')
        return(response['Body'].read().decode('ascii'))

In [14]:
a = pandas.read_csv('test_payload.csv', header=None).values
tm = TopicExtractor(a, name)
print(tm.extract())

Reservoir,Trap,Charge,Seal,Other,Topic
0.0,1.0,0.0,0.0,0.0,Trap
1.0,0.0,0.0,0.0,0.0,Reservoir
0.0,0.0,1.0,0.0,0.0,Charge
0.5,0.5,0.0,0.0,0.0,Reservoir
0.0,0.0,0.0,1.0,0.0,Seal
0.0,0.0,0.0,1.0,0.0,Seal
0.0,1.0,0.0,0.0,0.0,Trap
0.0,0.0,0.0,0.0,1.0,Other
0.0,1.0,0.0,0.0,0.0,Trap
0.5,0.0,0.0,0.5,0.0,Reservoir



## Delete the SageMaker Model, Endpoint Config, Endpoint

In [15]:
sagemaker.delete_model(ModelName=name)
sagemaker.delete_endpoint_config(EndpointConfigName=name)
sagemaker.delete_endpoint(EndpointName=name)

{'ResponseMetadata': {'HTTPHeaders': {'content-length': '0',
   'content-type': 'application/x-amz-json-1.1',
   'date': 'Fri, 26 Oct 2018 02:25:00 GMT',
   'x-amzn-requestid': 'd4ecb834-c432-4761-b125-a5edd446fe86'},
  'HTTPStatusCode': 200,
  'RequestId': 'd4ecb834-c432-4761-b125-a5edd446fe86',
  'RetryAttempts': 0}}