In [1]:
import boto3 
import sagemaker 

In [2]:
role = sagemaker.get_execution_role()
role

'arn:aws:iam::781274082941:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole'

In [3]:
my_region = boto3.Session().region_name
my_region 

'us-east-2'

# Creating a S3 bucket

In [4]:
bucket_name = 'sagemaker-demo-rksnrc-demo'
s3 = boto3.resource('s3')
s3

s3.ServiceResource()

In [5]:
try:
    if  my_region == 'us-east-1':
        s3.create_bucket(Bucket=bucket_name)
    else: 
        s3.create_bucket(
            Bucket=bucket_name, 
            CreateBucketConfiguration={ 'LocationConstraint': my_region }
        )
    print('S3 bucket created successfully')
except Exception as e:
    print('S3 error: ',e)

S3 bucket created successfully


# Loading the data 

In [6]:
import urllib.request
import numpy as np
import pandas as pd

In [7]:
data_url = "https://d1.awsstatic.com/tmt"\
            + "/build-train-deploy-machine-learning-model-sagemaker/"\
            + "bank_clean.27f01fbbdf43271788427f3682996ae29ceca05d.csv"

try:
    urllib.request.urlretrieve(
        data_url, 
        "bank_clean.csv"
    )
    print('Success: downloaded bank_clean.csv.')
except Exception as e:
    print('Data load error: ', e)

Success: downloaded bank_clean.csv.


In [8]:
try:
    model_data = pd.read_csv('./bank_clean.csv',index_col=0)
    print('Success: Data loaded into dataframe.')
except Exception as e:
    print('Data load error: ', e)

Success: Data loaded into dataframe.


In [9]:
train_data, test_data = np.split(
    model_data.sample(frac=1, random_state=1729), 
    [int(0.7 * len(model_data))]
)
print(train_data.shape, test_data.shape)

(28831, 61) (12357, 61)


# Storing the (training) data in S3 

In [10]:
import os

In [11]:
pd.concat(
    [train_data['y_yes'], 
    train_data.drop(['y_no', 'y_yes'], axis=1)], 
    axis=1
).to_csv('train.csv', index=False, header=False)

In [12]:
prefix = 'sagemaker/DEMO-xgboost-dm'
my_bucket = boto3.Session().resource('s3').Bucket(bucket_name)

train_url = os.path.join(prefix, 'train/train.csv')
my_bucket.Object(train_url).upload_file('train.csv')

In [31]:
pd.concat(
    [test_data['y_yes'], 
    test_data.drop(['y_no', 'y_yes'], axis=1)], 
    axis=1
).to_csv('test.csv', index=False, header=False)

test_url = os.path.join(prefix, 'test/test.csv')
my_bucket.Object(test_url).upload_file('test.csv')

# Training a model 

In [13]:
xgboost_container = sagemaker.image_uris.retrieve(
    "xgboost", my_region, "latest"
)

In [14]:
sess = sagemaker.Session()

xgb_model = sagemaker.estimator.Estimator(
    xgboost_container, 
    role, 
    instance_count=1, 
    instance_type='ml.m4.xlarge', # model training machine 
    output_path=f's3://{bucket_name}/{prefix}/output',
    sagemaker_session=sess
)

In [15]:
xgb_model.set_hyperparameters(
    max_depth=5,
    eta=0.2,
    gamma=4,
    min_child_weight=6,
    subsample=0.8,
    silent=0,
    objective='binary:logistic',
    num_round=100
)

In [16]:
s3_input_train = sagemaker.inputs.TrainingInput(
    s3_data=f's3://{bucket_name}/{prefix}/train',
    content_type='csv'
)

In [19]:
xgb_model.fit({'train': s3_input_train}, logs=False)


2022-11-18 04:06:59 Starting - Starting the training job....
2022-11-18 04:07:24 Starting - Preparing the instances for training......................
2022-11-18 04:09:20 Downloading - Downloading input data....
2022-11-18 04:09:45 Training - Downloading the training image...
2022-11-18 04:10:05 Training - Training image download completed. Training in progress.....
2022-11-18 04:10:28 Uploading - Uploading generated training model...........
2022-11-18 04:11:28 Completed - Training job completed


# Deploy a model to hosting service 

In [20]:
xgb_predictor = xgb_model.deploy(
    initial_instance_count=1,
    instance_type='ml.m4.xlarge' # server for the hosting service 
)

------!

# Deploy a model to batch transform 

In [23]:
batch_output = f's3://{bucket_name}/{prefix}/test/examples'
batch_input = f's3://{bucket_name}/{prefix}/batch_inference'

In [29]:
# transformer = xgb_model.transformer(
#     instance_count=1, 
#     instance_type='ml.m4.2xlarge', 
#     output_path=batch_output, 
# )

# transformer.transform(
#     data=batch_input, 
#     data_type='S3Prefix',
#     content_type='train/csv', 
#     split_type='Line',
# )

# transformer.wait()

# Validate the model 

- how to access the model from `sagemaker.predictor.RealTimePredictor`?

In [52]:
from sagemaker.serializers import CSVSerializer
from sklearn.metrics import confusion_matrix

In [41]:
my_bucket.download_file(test_url, 'test.csv')
test_data_array = test_data.drop(['y_no', 'y_yes'], axis=1).values 

In [47]:
xgb_predictor.serializer = CSVSerializer()
predictions = xgb_predictor.predict(test_data_array)
predictions_array = np.fromstring(
    predictions[1:].decode('utf-8') , 
    sep=','
) 

In [54]:
confusion_matrix(
    test_data['y_yes'], 
    (predictions_array > 0.5).astype('int')
)

array([[10769,   167],
       [ 1133,   288]])

# Endpoint to internet app


- Create an IAM role that the AWS Lambda service principal can assume. Give the role permissions to
call the Amazon SageMaker InvokeEndpoint API.
- Create a Lambda function that calls the Amazon SageMaker InvokeEndpoint API.
- Call the Lambda function from a mobile application.

# Clean up resources 

In [55]:
xgb_predictor.delete_endpoint(delete_endpoint_config=True)

bucket_to_delete = boto3.resource('s3').Bucket(bucket_name)
bucket_to_delete.objects.all().delete()

[{'ResponseMetadata': {'RequestId': 'H8EY1DA8RTBBFG3M',
   'HostId': '7zC1n21/Fu5sVrY9Nxq/LWxaFVK4yCd98m+HyryY/1ijqclwP5yWFIGQ6Ze1bj7atsIAHOE4JPU=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': '7zC1n21/Fu5sVrY9Nxq/LWxaFVK4yCd98m+HyryY/1ijqclwP5yWFIGQ6Ze1bj7atsIAHOE4JPU=',
    'x-amz-request-id': 'H8EY1DA8RTBBFG3M',
    'date': 'Fri, 18 Nov 2022 04:58:01 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'sagemaker/DEMO-xgboost-dm/output/xgboost-2022-11-18-04-06-59-761/rule-output/ProfilerReport-1668744419/profiler-output/profiler-report.ipynb'},
   {'Key': 'sagemaker/DEMO-xgboost-dm/output/xgboost-2022-11-18-04-06-59-761/output/model.tar.gz'},
   {'Key': 'sagemaker/DEMO-xgboost-dm/output/xgboost-2022-11-18-04-06-59-761/profiler-output/system/incremental/2022111804/1668744540.algo-1.json'},
   {'Key': 'sagemaker/DEMO-xgboost-dm/output/xgboost-2