In [18]:
import sagemaker
import boto3
import pandas as pd
from sagemaker import get_execution_role

In [19]:
# get region name
region = boto3.Session().region_name
print ('region -> {}'.format(region))

region -> eu-west-1


In [20]:
# initialize session
session = sagemaker.Session()

# bucket details
bucket = 'snowflake-getting-started'
prefix = 'bank-marketing'

# get execution role
role = get_execution_role()

sm = boto3.Session().client(service_name='sagemaker',region_name=region)

## AutoPilot Experiment Configurations

### Part 1 - Specify Input Data Config, Job Config, Output Data Config, Problem Type & Objective

In [21]:
input_data_config =[
    {
        'DataSource':{
            'S3DataSource':{
                'S3DataType':'S3Prefix',
                'S3Uri':'s3://{}/{}/train'.format(bucket,prefix)
            }
        },
        'TargetAttributeName':'Class'
    }
]

In [22]:
job_config = {
    'CompletionCriteria':{
      'MaxRuntimePerTrainingJobInSeconds': 600,
      'MaxAutoMLJobRuntimeInSeconds': 3600
    },
}

In [23]:
output_data_config = {
    'S3OutputPath' : 's3://{}/{}/autopilot-sdk-outputs'.format(bucket,prefix)
}

In [24]:
problem_type = 'BinaryClassification'
job_objective = {'MetricName':'F1'}

### Part 2 - Create AutoML Job

In [25]:
from time import gmtime, strftime, sleep
timestamp_suffix = strftime('%d-%H-%M-%S', gmtime())
 
auto_ml_job_name = 'bankmarketing-sdk-exp' + timestamp_suffix
print('AutoMLJobName: ' + auto_ml_job_name)
 
sm.create_auto_ml_job(AutoMLJobName=auto_ml_job_name,
                      InputDataConfig=input_data_config,
                      OutputDataConfig=output_data_config,
                      AutoMLJobConfig=job_config,
                      AutoMLJobObjective=job_objective,
                      ProblemType=problem_type,
                      RoleArn=role)

AutoMLJobName: bankmarketing-sdk-exp24-21-54-26


{'AutoMLJobArn': 'arn:aws:sagemaker:eu-west-1:951135073253:automl-job/bankmarketing-sdk-exp24-21-54-26',
 'ResponseMetadata': {'RequestId': '6b001e5f-da09-4206-aaa4-65cbc7227b74',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '6b001e5f-da09-4206-aaa4-65cbc7227b74',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '103',
   'date': 'Fri, 24 Jul 2020 21:54:26 GMT'},
  'RetryAttempts': 0}}

### Part 3 - Monitor Job

    This code is generic in nature and works as is for all models & jobs

In [None]:
print ('JobStatus - Secondary Status')
print('------------------------------')
 
 
describe_response = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)
print (describe_response['AutoMLJobStatus'] + " - " + describe_response['AutoMLJobSecondaryStatus'])
job_run_status = describe_response['AutoMLJobStatus']
    
while job_run_status not in ('Failed', 'Completed', 'Stopped'):
    describe_response = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)
    job_run_status = describe_response['AutoMLJobStatus']
    
    print (describe_response['AutoMLJobStatus'] + " - " + describe_response['AutoMLJobSecondaryStatus'])
    sleep(30)

JobStatus - Secondary Status
------------------------------
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData


### Part 4 - Get Data Exploration Notebook, Candidate Definition Notebook & Name of best candidate model

    This code is generic and would work for all models

In [15]:
job = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)
 
job_candidate_notebook = job['AutoMLJobArtifacts']['CandidateDefinitionNotebookLocation']
job_data_notebook = job['AutoMLJobArtifacts']['DataExplorationNotebookLocation']
job_best_candidate = job['BestCandidate']
job_best_candidate_name = job_best_candidate['CandidateName']
 
job_candidate_notebook
job_data_notebook
job_best_candidate_name

ResourceNotFound: An error occurred (ResourceNotFound) when calling the DescribeAutoMLJob operation: Amazon SageMaker can't find an AutoML job called bankmarketing-sdk-exp24-20-39-31.

In [16]:
%%sh -s $job_candidate_notebook $job_data_notebook
 
aws s3 cp $1 .
aws s3 cp $2 .


usage: aws s3 cp <LocalPath> <S3Uri> or <S3Uri> <LocalPath> or <S3Uri> <S3Uri>
Error: Invalid argument type

usage: aws s3 cp <LocalPath> <S3Uri> or <S3Uri> <LocalPath> or <S3Uri> <S3Uri>
Error: Invalid argument type


CalledProcessError: Command 'b' \naws s3 cp $1 .\naws s3 cp $2 .\n'' returned non-zero exit status 255.

### Part 5 - Create the model from the best candidate, deploy it and perform batch inferencing.

    Generic code would work for all jobs and models

In [None]:
model_name = 'automl-sdk-bank-marketing-model-' + timestamp_suffix

model = sm.create_model(Containers=job_best_candidate['InferenceContainers'],
                            ModelName=model_name,
                            ExecutionRoleArn=role)

print('Model ARN corresponding to the best candidate is : {}'.format(model['ModelArn']))

### Part 6 - Bulk Inferencing - Transform Test Data Held in S3

In [None]:
transform_job_name = 'automl-sdk-bankmarketing-transform-' + timestamp_suffix

transform_input = {
        'DataSource': {
            'S3DataSource': {
                'S3DataType': 'S3Prefix',
                'S3Uri':'s3://{}/{}/test'.format(bucket,prefix)
            }
        },
        'ContentType': 'text/csv',
        'CompressionType': 'None',
        'SplitType': 'Line'
    }

transform_output = {
        'S3OutputPath': 's3://{}/{}/inference-results'.format(bucket,prefix),
    }

transform_resources = {
        'InstanceType': 'ml.m4.medium',
        'InstanceCount': 1
    }

sm.create_transform_job(TransformJobName = transform_job_name,
                        ModelName = model_name,
                        TransformInput = transform_input,
                        TransformOutput = transform_output,
                        TransformResources = transform_resources
)

### Part 6 - Poll Job Status

    Generic code for all models & jobs

In [None]:
print ('JobStatus')
print('----------')
 
describe_response = sm.describe_transform_job(TransformJobName = transform_job_name)
job_run_status = describe_response['TransformJobStatus']
print (job_run_status)
 
while job_run_status not in ('Failed', 'Completed', 'Stopped'):
    describe_response = sm.describe_transform_job(TransformJobName = transform_job_name)
    job_run_status = describe_response['TransformJobStatus']
    print (job_run_status)
    sleep(30)

In [None]:
s3_output_key = '{}/inference-results/test_data.csv.out'.format(prefix);
local_inference_results_path = 'inference_results.csv'
 
s3 = boto3.resource('s3')
inference_results_bucket = s3.Bucket(session.default_bucket())
 
inference_results_bucket.download_file(s3_output_key, local_inference_results_path);
 
data = pd.read_csv(local_inference_results_path, sep=';')
pd.set_option('display.max_rows', 10)       
data