In [1]:
# Import Libs #

In [2]:
import pandas as pd 
import boto3
from zipfile import ZipFile
import sagemaker
from sagemaker import get_execution_role
from time import gmtime, strftime, sleep


In [3]:
# Declaração de variaveis # 

In [51]:
prefix = 'sagemaker/autopilot-dm'
prefix_2 = 'datasets/'
session = sagemaker.Session()
s3 = boto3.client('s3')
region = 'us-east-1'
bucket = session.default_bucket()
sm = boto3.Session().client(service_name='sagemaker',region_name=region)
timestamp_suffix = strftime('%d-%H-%M-%S', gmtime())
role = get_execution_role()

# Dataset supervisionado # 
train = './train_data.csv'
# Dataset com coluna Y para identificar se o resultado foi o esperado # 
test = './test_data.csv'
# Dataset sem os clientes que nao querem participar das campanhas # 
inferencia = './infer_data.csv'




In [6]:
# Configuraçao JOB Autopilot # 

In [15]:
input_data_config = [{
      'DataSource': {
        'S3DataSource': {
          'S3DataType': 'S3Prefix',
          'S3Uri': 's3://{}/{}/train'.format(bucket,prefix)
        }
      },
      'TargetAttributeName': 'y'
    }
  ]

output_data_config = {
    'S3OutputPath': 's3://{}/{}/output'.format(bucket,prefix)
  }


In [None]:
# Criaçao Autopilot Job # 

In [28]:
from time import gmtime, strftime, sleep
timestamp_suffix = strftime('%d-%H-%M-%S', gmtime())

auto_ml_job_name = 'automl-sicoob-' + timestamp_suffix
print('AutoMLJobName: ' + auto_ml_job_name)

sm.create_auto_ml_job(AutoMLJobName=auto_ml_job_name,
                      InputDataConfig=input_data_config,
                      OutputDataConfig=output_data_config,
                      RoleArn=role)


AutoMLJobName: automl-sicoob-24-21-53-31


{'AutoMLJobArn': 'arn:aws:sagemaker:us-east-1:631486845952:automl-job/automl-sicoob-24-21-53-31',
 'ResponseMetadata': {'RequestId': '662f5077-996a-4fe6-b576-59bd16edc95b',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '662f5077-996a-4fe6-b576-59bd16edc95b',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '96',
   'date': 'Mon, 24 Feb 2020 21:53:30 GMT'},
  'RetryAttempts': 0}}

In [None]:
print ('JobStatus - Secondary Status')
print('------------------------------')


describe_response = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)
print (describe_response['AutoMLJobStatus'] + " - " + describe_response['AutoMLJobSecondaryStatus'])
job_run_status = describe_response['AutoMLJobStatus']
    
while job_run_status not in ('Failed', 'Completed', 'Stopped'):
    describe_response = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)
    job_run_status = describe_response['AutoMLJobStatus']
    
    print (describe_response['AutoMLJobStatus'] + " - " + describe_response['AutoMLJobSecondaryStatus'])
    sleep(30)


JobStatus - Secondary Status
------------------------------
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineer

In [17]:
# Identificação melhor candidato que o Autopilot determinou # 

In [30]:
best_candidate = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)['BestCandidate']
best_candidate_name = best_candidate['CandidateName']
print(best_candidate)
print('\n')
print("CandidateName: " + best_candidate_name)
print("FinalAutoMLJobObjectiveMetricName: " + best_candidate['FinalAutoMLJobObjectiveMetric']['MetricName'])
print("FinalAutoMLJobObjectiveMetricValue: " + str(best_candidate['FinalAutoMLJobObjectiveMetric']['Value']))


{'CandidateName': 'tuning-job-1-00690a1bdf174cc1b0-121-62ea64b9', 'FinalAutoMLJobObjectiveMetric': {'MetricName': 'validation:accuracy', 'Value': 0.9157689809799194}, 'ObjectiveStatus': 'Succeeded', 'CandidateSteps': [{'CandidateStepType': 'AWS::SageMaker::ProcessingJob', 'CandidateStepArn': 'arn:aws:sagemaker:us-east-1:631486845952:processing-job/db-1-ddc01f74d2554c7d9abe9ff588874ccbd80ac325edcd41dd8d80517a26', 'CandidateStepName': 'db-1-ddc01f74d2554c7d9abe9ff588874ccbd80ac325edcd41dd8d80517a26'}, {'CandidateStepType': 'AWS::SageMaker::TrainingJob', 'CandidateStepArn': 'arn:aws:sagemaker:us-east-1:631486845952:training-job/automl-sic-dpp8-1-a274656d200c438dbe68017313de2cb45e8837ec00254', 'CandidateStepName': 'automl-sic-dpp8-1-a274656d200c438dbe68017313de2cb45e8837ec00254'}, {'CandidateStepType': 'AWS::SageMaker::TransformJob', 'CandidateStepArn': 'arn:aws:sagemaker:us-east-1:631486845952:transform-job/automl-sic-dpp8-csv-1-bf1ba1252f04444695576f0538e55431941d5c759', 'CandidateStepNa

In [None]:
# Criação do modelo utilizando o melhor candidato # 

In [31]:
model_name = 'automl-sicoob-model-' + timestamp_suffix

model = sm.create_model(Containers=best_candidate['InferenceContainers'],
                            ModelName=model_name,
                            ExecutionRoleArn=role)

print('Model ARN corresponding to the best candidate is : {}'.format(model['ModelArn']))


Model ARN corresponding to the best candidate is : arn:aws:sagemaker:us-east-1:631486845952:model/automl-sicoob-model-24-21-53-31


In [None]:
# Inferencia com o modelo agora baseado no melhor candidato que já foi identificado, resultado enviado para o S3 # 

In [77]:
transform_job_name = 'automl-sicoob-transform11-' + timestamp_suffix

transform_input = {
        'DataSource': {
            'S3DataSource': {
                'S3DataType': 'S3Prefix',
                'S3Uri': 's3://{}/{}/test'.format(bucket,prefix)
            }
        },
        'ContentType': 'text/csv',
        'CompressionType': 'None',
        'SplitType': 'Line'
    }

transform_output = {
        'S3OutputPath': 's3://{}/{}/test-results.csv'.format(bucket,prefix),
    }

transform_resources = {
        'InstanceType': 'ml.m5.4xlarge',
        'InstanceCount': 1
    }

sm.create_transform_job(TransformJobName = transform_job_name,
                        ModelName = model_name,
                        TransformInput = transform_input,
                        TransformOutput = transform_output,
                        TransformResources = transform_resources
)
print(transform_output)

{'S3OutputPath': 's3://sagemaker-us-east-1-631486845952/sagemaker/autopilot-dm/test-results.csv'}


In [78]:
s3_output_key = '{}/inference-results/test_data.csv.out'.format(prefix);
local_inference_results_path = 'inference_results.csv'

s3 = boto3.resource('s3')
inference_results_bucket = s3.Bucket(session.default_bucket())

inference_results_bucket.download_file(s3_output_key, local_inference_results_path);

data = pd.read_csv(local_inference_results_path, sep=';')
pd.set_option('display.max_rows', 10)         # Keep the output on one page
data




Unnamed: 0,no
0,no
1,no
2,no
3,no
4,no
...,...
8232,yes
8233,yes
8234,no
8235,yes
