In [1]:
import sagemaker
import boto3
import pandas as pd
from sagemaker import get_execution_role

In [2]:
# get region name
region = boto3.Session().region_name
print ('region -> {}'.format(region))

region -> eu-west-1


In [3]:
# initialize session
session = sagemaker.Session()

# bucket details
bucket = 'snowflake-getting-started'
prefix = 'bank-marketing'

# get execution role
role = get_execution_role()

sm = boto3.Session().client(service_name='sagemaker',region_name=region)

## AutoPilot Experiment Configurations

### Part 1 - Specify Input Data Config, Job Config, Output Data Config, Problem Type & Objective

In [4]:
input_data_config =[
    {
        'DataSource':{
            'S3DataSource':{
                'S3DataType':'S3Prefix',
                'S3Uri':'s3://{}/{}/train'.format(bucket,prefix)
            }
        },
        'TargetAttributeName':'Class'
    }
]

In [5]:
job_config = {
    'CompletionCriteria':{
      'MaxRuntimePerTrainingJobInSeconds': 600,
      'MaxAutoMLJobRuntimeInSeconds': 3600
    },
}

In [6]:
output_data_config = {
    'S3OutputPath' : 's3://{}/{}/autopilot-sdk-outputs'.format(bucket,prefix)
}

In [7]:
problem_type = 'BinaryClassification'
job_objective = {'MetricName':'F1'}

### Part 2 - Create AutoML Job

In [8]:
from time import gmtime, strftime, sleep
timestamp_suffix = strftime('%d-%H-%M-%S', gmtime())
 
auto_ml_job_name = 'bankmarketing-sdk-exp' + timestamp_suffix
print('AutoMLJobName: ' + auto_ml_job_name)
 
sm.create_auto_ml_job(AutoMLJobName=auto_ml_job_name,
                      InputDataConfig=input_data_config,
                      OutputDataConfig=output_data_config,
                      AutoMLJobConfig=job_config,
                      AutoMLJobObjective=job_objective,
                      ProblemType=problem_type,
                      RoleArn=role)

AutoMLJobName: bankmarketing-sdk-exp25-06-40-23


{'AutoMLJobArn': 'arn:aws:sagemaker:eu-west-1:951135073253:automl-job/bankmarketing-sdk-exp25-06-40-23',
 'ResponseMetadata': {'RequestId': 'eef55f37-0261-4f1b-91ae-971658a85a16',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'eef55f37-0261-4f1b-91ae-971658a85a16',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '103',
   'date': 'Sat, 25 Jul 2020 06:40:23 GMT'},
  'RetryAttempts': 0}}

### Part 3 - Monitor Job

    This code is generic in nature and works as is for all models & jobs

In [9]:
print ('JobStatus - Secondary Status')
print('------------------------------')
 
 
describe_response = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)
print (describe_response['AutoMLJobStatus'] + " - " + describe_response['AutoMLJobSecondaryStatus'])
job_run_status = describe_response['AutoMLJobStatus']
    
while job_run_status not in ('Failed', 'Completed', 'Stopped'):
    describe_response = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)
    job_run_status = describe_response['AutoMLJobStatus']
    
    print (describe_response['AutoMLJobStatus'] + " - " + describe_response['AutoMLJobSecondaryStatus'])
    sleep(30)

JobStatus - Secondary Status
------------------------------
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEng

### Part 4 - Get Data Exploration Notebook, Candidate Definition Notebook & Name of best candidate model

    This code is generic and would work for all models

In [10]:
job = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)
 
job_candidate_notebook = job['AutoMLJobArtifacts']['CandidateDefinitionNotebookLocation']
job_data_notebook = job['AutoMLJobArtifacts']['DataExplorationNotebookLocation']
job_best_candidate = job['BestCandidate']
job_best_candidate_name = job_best_candidate['CandidateName']
 
job_candidate_notebook
job_data_notebook
job_best_candidate_name

'tuning-job-1-b61fc32c210b4cc6b0-126-689c935f'

In [11]:
%%sh -s $job_candidate_notebook $job_data_notebook
 
aws s3 cp $1 .
aws s3 cp $2 .

download: s3://snowflake-getting-started/bank-marketing/autopilot-sdk-outputs/bankmarketing-sdk-exp25-06-40-23/sagemaker-automl-candidates/pr-1-cb780d5ff9564927abb52a64a4a0d94d059925d614aa4d78a69559cb3f/notebooks/SageMakerAutopilotCandidateDefinitionNotebook.ipynb to ./SageMakerAutopilotCandidateDefinitionNotebook.ipynb
download: s3://snowflake-getting-started/bank-marketing/autopilot-sdk-outputs/bankmarketing-sdk-exp25-06-40-23/sagemaker-automl-candidates/pr-1-cb780d5ff9564927abb52a64a4a0d94d059925d614aa4d78a69559cb3f/notebooks/SageMakerAutopilotDataExplorationNotebook.ipynb to ./SageMakerAutopilotDataExplorationNotebook.ipynb


### Part 5 - Create the model from the best candidate, deploy it and perform batch inferencing.

    Generic code would work for all jobs and models
    
    Once the model is created we have two options. Either to do Real Time Inferences OR Batch Based Inferences. For 'Real Time' inferences, we create an EndPointConfig and an EndPoint which basically deploys the model and exposes it as an API for integration. For 'Batch Based' inferences, we don't need to deploy the model, we need to create a TransformJob which reads data from S3 bucket, spins an instance and generates inferences.

#### Part 5.1 - Create Model Instance

In [34]:
model_name = 'automl-sdk-bank-marketing-best-model-' + timestamp_suffix

model = sm.create_model(Containers=job_best_candidate['InferenceContainers'],
                            ModelName=model_name,
                            ExecutionRoleArn=role)

print('Model ARN corresponding to the best candidate is : {}'.format(model['ModelArn']))

Model ARN corresponding to the best candidate is : arn:aws:sagemaker:eu-west-1:951135073253:model/automl-sdk-bank-marketing-best-model-25-06-40-23


#### Part 5.2 - Create End Point Config

In [18]:
endpoint_config_name = 'Bank-Mktg-EndpointConfig-' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print(endpoint_config_name)
create_endpoint_config_response = sm.create_endpoint_config(
    EndpointConfigName = endpoint_config_name,
    ProductionVariants=[{
        'InstanceType':'ml.m4.xlarge',
        'InitialVariantWeight':1,
        'InitialInstanceCount':1,
        'ModelName':model_name,
        'VariantName':'AllTraffic'}])

print("Endpoint Config Arn: " + create_endpoint_config_response['EndpointConfigArn'])

Bank-Mktg-EndpointConfig-2020-07-25-08-53-11
Endpoint Config Arn: arn:aws:sagemaker:eu-west-1:951135073253:endpoint-config/bank-mktg-endpointconfig-2020-07-25-08-53-11


#### Part 5.3 - Create EndPoint for RealTime Inferences

In [19]:
%%time
import time

endpoint_name = 'Bank-Mktg-Endpoint-' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print(endpoint_name)
create_endpoint_response = sm.create_endpoint(
    EndpointName=endpoint_name,
    EndpointConfigName=endpoint_config_name)
print(create_endpoint_response['EndpointArn'])

resp = sm.describe_endpoint(EndpointName=endpoint_name)
status = resp['EndpointStatus']
print("Status: " + status)

while status=='Creating':
    time.sleep(60)
    resp = sm.describe_endpoint(EndpointName=endpoint_name)
    status = resp['EndpointStatus']
    print("Status: " + status)

print("Arn: " + resp['EndpointArn'])
print("Status: " + status)

Bank-Mktg-Endpoint-2020-07-25-08-54-19
arn:aws:sagemaker:eu-west-1:951135073253:endpoint/bank-mktg-endpoint-2020-07-25-08-54-19
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: InService
Arn: arn:aws:sagemaker:eu-west-1:951135073253:endpoint/bank-mktg-endpoint-2020-07-25-08-54-19
Status: InService
CPU times: user 118 ms, sys: 11.9 ms, total: 130 ms
Wall time: 8min 1s


#### 5.4 Real time Inferences

In [20]:
from sagemaker.predictor import json_serializer, json_deserializer, RealTimePredictor
from sagemaker.content_types import CONTENT_TYPE_CSV, CONTENT_TYPE_JSON

# initialize session
session = sagemaker.Session()

endpoint_name="Bank-Mktg-Endpoint-2020-07-25-08-54-19"
predictor = RealTimePredictor(endpoint=endpoint_name, 
                              sagemaker_session=session,
                              content_type=CONTENT_TYPE_CSV)

In [21]:
payload="43,technician,divorced,unknown,no,4389,no,no,telephone,2,jul,100,2,85,1,success"
predicted_value = predictor.predict(payload).decode('utf-8')

print (predicted_value)

1



### Part 6 - Bulk Inferencing - Transform Test Data Held in S3

    To do bulk inferencing we create a transform job which takes test data location, output location where inferences would be stored and finally the instance which needs to be used for inferecing.

In [35]:
transform_job_name = 'automl-sdk-bankmarketing-transform-3-' + timestamp_suffix

print ('Transform Job Name ->', transform_job_name)

transform_input = {
    #"ExperimentConfig": { 
    #  "ExperimentName": "BANKMARKETING-SDK-EXP25-06-40-23",
    #  "TrialComponentDisplayName": "bank-marketing-transform-job-1",
    #  "TrialName": "bank-marketing-transform-job-name"
    # },
    'DataSource': {
            'S3DataSource': {
                'S3DataType': 'S3Prefix',
                'S3Uri':'s3://{}/{}/test'.format(bucket,prefix)
            }
        },
        'ContentType': 'text/csv',
        'CompressionType': 'None',
        'SplitType': 'Line'
    }

transform_output = {
        'S3OutputPath': 's3://{}/{}/inference-results'.format(bucket,prefix),
    }

transform_resources = {
        'InstanceType': 'ml.m4.xlarge',
        'InstanceCount': 1
    }

sm.create_transform_job(TransformJobName = transform_job_name,
                        ModelName = model_name,
                        TransformInput = transform_input,
                        TransformOutput = transform_output,
                        TransformResources = transform_resources
)

Transform Job Name -> automl-sdk-bankmarketing-transform-3-25-06-40-23


{'TransformJobArn': 'arn:aws:sagemaker:eu-west-1:951135073253:transform-job/automl-sdk-bankmarketing-transform-3-25-06-40-23',
 'ResponseMetadata': {'RequestId': '5f88d195-4765-4ed1-b378-cf533105f138',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '5f88d195-4765-4ed1-b378-cf533105f138',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '125',
   'date': 'Sat, 25 Jul 2020 10:24:29 GMT'},
  'RetryAttempts': 0}}

### Part 6.1 - Bulk Inferencing - Poll Job Status

    Generic code for all models & jobs

In [36]:
print ('JobStatus')
print('----------')
 
describe_response = sm.describe_transform_job(TransformJobName = transform_job_name)
job_run_status = describe_response['TransformJobStatus']
print (job_run_status)
 
while job_run_status not in ('Failed', 'Completed', 'Stopped'):
    describe_response = sm.describe_transform_job(TransformJobName = transform_job_name)
    job_run_status = describe_response['TransformJobStatus']
    print (job_run_status)
    sleep(30)

JobStatus
----------
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
Completed


### Part 6.2 - Download the inferencing outcomes.

    Download the inferences from the s3 bucket, once downloaded these results can be pushed back to snowflake for visualization etc.

In [50]:
# local directory where the inferences are downloaded
local_inference_results_path = 'inference_results'

session.download_data(path=local_inference_results_path,
                      bucket=bucket,
                      key_prefix=prefix+'/inference-results/test_data.csv.out')
print ('downloaded inferences from s3 bucket to local directory')

data = pd.read_csv(local_inference_results_path+'/test_data.csv.out', sep=';', engine='python')
pd.set_option('display.max_rows', 10)       
data

downloaded inferences from s3 bucket to local directory


Unnamed: 0,1
0,1
1,1
2,1
3,1
4,1
...,...
9037,1
9038,1
9039,2
9040,2


### Part 7 - Download the logs and upload them back to S3