In [40]:
import sklearn
import pandas as pd
import numpy as np
import boto3
import pprint
import os
import time


import re

import sagemaker
from sagemaker import get_execution_role

In [41]:
sess = sagemaker.Session()

In [42]:
role = get_execution_role()
print(role)

arn:aws:iam::750253866451:role/sage_role


In [43]:
bucket_name = 'saurav-ml-sagemaker'

training_folder = r'ChurnModel/training/'
test_folder = r'ChurnModel/test/'
validation_folder = r'ChurnModel/validation/'

In [44]:
s3_model_output_location = r's3://{}/ChurnModel/model'.format(bucket_name)
s3_training_file_location = r's3://{0}/{1}'.format(bucket_name,training_folder)
s3_test_file_location = r's3://{0}/{1}'.format(bucket_name,test_folder)
s3_validation_file_location = r's3://{0}/{1}'.format(bucket_name, validation_folder)

In [45]:
print(s3_model_output_location)
print(s3_training_file_location)
print(s3_validation_file_location)
print(s3_test_file_location)

s3://saurav-ml-sagemaker/ChurnModel/model
s3://saurav-ml-sagemaker/ChurnModel/training/
s3://saurav-ml-sagemaker/ChurnModel/validation/
s3://saurav-ml-sagemaker/ChurnModel/test/


In [46]:
from sagemaker.amazon.amazon_estimator import get_image_uri
container = get_image_uri(boto3.Session().region_name, 'xgboost', 'latest')

	get_image_uri(region, 'xgboost', '0.90-1').


In [57]:
xgboost_job = "Demo-xgboost" + time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())
print('Job Name is {}'.format(xgboost_job))

Job Name is Demo-xgboost2020-03-29-08-25-20


In [58]:
linear_training_params = {
    "RoleArn": role,
    "TrainingJobName": xgboost_job,
    "AlgorithmSpecification": {
        "TrainingImage": container,
        "TrainingInputMode": "File"
    },
    "ResourceConfig": {
        "InstanceCount": 1,
        "InstanceType": "ml.c4.2xlarge",
        "VolumeSizeInGB": 10
    },
    "InputDataConfig": [
        {
            "ChannelName": "train",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri": s3_training_file_location,
                    "S3DataDistributionType": "ShardedByS3Key"
                }
            },
            "ContentType": "text/csv",
            "CompressionType": "None",
            "RecordWrapperType": "None"
        },
        {
            "ChannelName": "validation",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri": s3_validation_file_location,
                    "S3DataDistributionType": "FullyReplicated"
                }
            },
            "ContentType": "text/csv",
            "CompressionType": "None",
            "RecordWrapperType": "None"
        }

    ],
    "OutputDataConfig": {
        "S3OutputPath": s3_model_output_location
    },
    "HyperParameters": {
                             'max_depth': "5",
                             'objective' : "binary:logistic",
                             'eta' : "0.1",
                             'num_round' : "150"
        
    },
    "StoppingCondition": {
        "MaxRuntimeInSeconds": 60 * 60
    }
}

In [59]:
region = boto3.Session().region_name
sm = boto3.client('sagemaker')

sm.create_training_job(**linear_training_params)

status = sm.describe_training_job(TrainingJobName=xgboost_job)['TrainingJobStatus']
print(status)
sm.get_waiter('training_job_completed_or_stopped').wait(TrainingJobName=xgboost_job)
if status == 'Failed':
    message = sm.describe_training_job(TrainingJobName=xgboost_job)['FailureReason']
    print('Training failed with the following error: {}'.format(message))
    raise Exception('Training job failed')

InProgress


In [62]:
linear_hosting_container = {
    'Image': container,
    'ModelDataUrl': sm.describe_training_job(TrainingJobName=xgboost_job)['ModelArtifacts']['S3ModelArtifacts']
}

create_model_response = sm.create_model(
    ModelName=xgboost_job,
    ExecutionRoleArn=role,
    PrimaryContainer=linear_hosting_container)

print(create_model_response['ModelArn'])


arn:aws:sagemaker:us-east-1:750253866451:model/demo-xgboost2020-03-29-08-25-20


In [None]:
linear_endpoint_config = 'DEMO-linear-endpoint-config-' + time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())
print(linear_endpoint_config)
create_endpoint_config_response = sm.create_endpoint_config(
    EndpointConfigName=linear_endpoint_config,
    ProductionVariants=[{
        'InstanceType': 'ml.m4.xlarge',
        'InitialInstanceCount': 1,
        'ModelName': xgboost_job,
        'VariantName': 'AllTraffic'}])

print("Endpoint Config Arn: " + create_endpoint_config_response['EndpointConfigArn'])

In [None]:
linear_endpoint = 'DEMO-linear-endpoint-' + time.strftime("%Y%m%d%H%M", time.gmtime())
print(linear_endpoint)
create_endpoint_response = sm.create_endpoint(
    EndpointName=linear_endpoint,
    EndpointConfigName=linear_endpoint_config)
print(create_endpoint_response['EndpointArn'])

resp = sm.describe_endpoint(EndpointName=linear_endpoint)
status = resp['EndpointStatus']
print("Status: " + status)

sm.get_waiter('endpoint_in_service').wait(EndpointName=linear_endpoint)

resp = sm.describe_endpoint(EndpointName=linear_endpoint)
status = resp['EndpointStatus']
print("Arn: " + resp['EndpointArn'])
print("Status: " + status)

if status != 'InService':
    raise Exception('Endpoint creation did not succeed')

In [None]:
#PREDICT

In [None]:
def np2csv(arr):
    csv = io.BytesIO()
    np.savetxt(csv, arr, delimiter=',', fmt='%g')
    return csv.getvalue().decode().rstrip()

In [None]:
runtime= boto3.client('runtime.sagemaker')

payload = np2csv(test_X)
response = runtime.invoke_endpoint(EndpointName=linear_endpoint,
                                   ContentType='text/csv',
                                   Body=payload)
result = json.loads(response['Body'].read().decode())
test_pred = np.array([r['score'] for r in result['predictions']])

In [None]:
test_mae_linear = np.mean(np.abs(test_y - test_pred))
test_mae_baseline = np.mean(np.abs(test_y - np.median(train_y))) ## training median as baseline predictor

print("Test MAE Baseline :", round(test_mae_baseline, 3))
print("Test MAE Linear:", round(test_mae_linear,3))

In [None]:
test_pred_class = (test_pred > 0.5)+0;
test_pred_baseline = np.repeat(np.median(train_y), len(test_y))

prediction_accuracy = np.mean((test_y == test_pred_class))*100
baseline_accuracy = np.mean((test_y == test_pred_baseline))*100

print("Prediction Accuracy:", round(prediction_accuracy,1), "%")
print("Baseline Accuracy:", round(baseline_accuracy,1), "%")

In [None]:
sm.delete_endpoint(EndpointName=linear_endpoint)
