In [1]:
pip install sagemaker -U

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
You should consider upgrading via the '/home/ec2-user/anaconda3/envs/python3/bin/python -m pip install --upgrade pip' command.[0m[33m
[0mNote: you may need to restart the kernel to use updated packages.


In [2]:
import sagemaker
session = sagemaker.Session()

# role = sagemaker.get_execution_role()
# print("RoleArn: {}".format(role))

In [3]:
region = sagemaker.Session().boto_region_name
print("AWS Region: {}".format(region))

role = sagemaker.get_execution_role()
print("RoleArn: {}".format(role))

AWS Region: us-east-1
RoleArn: arn:aws:iam::432547830124:role/service-role/AmazonSageMaker-ExecutionRole-20221229T184597


In [4]:
import boto3

In [5]:
bucket_name = sagemaker.Session().default_bucket()

In [6]:
# set an output path where the trained model will be saved
prefix = 'xgboost-as-a-built-in-algo'
output_path ='s3://{}/{}/output'.format(bucket_name, prefix)
print(output_path)

s3://sagemaker-us-east-1-432547830124/xgboost-as-a-built-in-algo/output


In [7]:
import pandas as pd
import urllib
try:
    urllib.request.urlretrieve ("https://d1.awsstatic.com/tmt/build-train-deploy-machine-learning-model-sagemaker/bank_clean.27f01fbbdf43271788427f3682996ae29ceca05d.csv", "bank_clean.csv")
    print('Success: downloaded bank_clean.csv.')
except Exception as e:
    print('Data load error: ',e)

try:
    model_data = pd.read_csv('./bank_clean.csv',index_col=0)
    print('Success: Data loaded into dataframe.')
except Exception as e:
    print('Data load error: ',e)

Success: downloaded bank_clean.csv.
Success: Data loaded into dataframe.


In [8]:
### Train Test split

import numpy as np
train_data, test_data = np.split(model_data.sample(frac=1, random_state=1729), [int(0.7 * len(model_data))])
print(train_data.shape, test_data.shape)

(28831, 61) (12357, 61)


In [9]:

### Saving Train And Test Into Buckets
## We start with Train Data
import os
pd.concat([train_data['y_yes'], train_data.drop(['y_no', 'y_yes'], 
                                                axis=1)], 
                                                axis=1).to_csv('train.csv', index=False, header=False)
boto3.Session().resource('s3').Bucket(bucket_name).Object(os.path.join(prefix, 'train/train.csv')).upload_file('train.csv')
#s3_input_train = sagemaker.s3_input(s3_data='s3://{}/{}/train'.format(bucket_name, prefix), content_type='csv')

In [10]:
# Test Data Into Buckets
pd.concat([test_data['y_yes'], test_data.drop(['y_no', 'y_yes'], axis=1)], axis=1).to_csv('test.csv', index=False, header=False)
boto3.Session().resource('s3').Bucket(bucket_name).Object(os.path.join(prefix, 'test/test.csv')).upload_file('test.csv')

In [11]:
s3_input_train = sagemaker.TrainingInput(s3_data='s3://{}/{}/train'.format(bucket_name, prefix), content_type='csv')

In [12]:
s3_input_test = sagemaker.TrainingInput(s3_data='s3://{}/{}/test'.format(bucket_name, prefix), content_type='csv')

In [13]:
s3_output_location='s3://{}/{}/{}'.format(bucket_name, prefix, 'xg_model')

In [14]:
container= sagemaker.image_uris.retrieve("xgboost", region, "1.2-1")

In [16]:
#print(container)
from sagemaker.debugger import Rule, rule_configs

s3_output_location='s3://{}/{}/{}'.format(bucket_name, prefix, 'xgboost_model')

xgb_model=sagemaker.estimator.Estimator(
    image_uri=container,
    role=role,
    instance_count=1,
    instance_type='ml.m4.xlarge',
    volume_size=5,
    output_path=s3_output_location,
    sagemaker_session=sagemaker.Session(),
    rules=[Rule.sagemaker(rule_configs.create_xgboost_report())]
)

In [17]:
xgb_model.set_hyperparameters(
    max_depth = 5,
    eta = 0.2,
    gamma = 4,
    min_child_weight = 6,
    subsample = 0.7,
    objective = "binary:logistic",
    num_round = 1000
)

In [18]:
xgb_model.fit({"train":s3_input_train, "validation":s3_input_test},wait=True)

INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2022-12-29-16-11-47-930


2022-12-29 16:11:48 Starting - Starting the training job...CreateXgboostReport: InProgress
...
2022-12-29 16:12:32 Starting - Preparing the instances for training.........
2022-12-29 16:14:12 Downloading - Downloading input data...
2022-12-29 16:14:37 Training - Downloading the training image...
2022-12-29 16:15:13 Training - Training image download completed. Training in progress...[34m[2022-12-29 16:15:26.129 ip-10-0-81-126.ec2.internal:7 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34mINFO:sagemaker-containers:Imported framework sagemaker_xgboost_container.training[0m
[34mINFO:sagemaker-containers:Failed to parse hyperparameter objective value binary:logistic to Json.[0m
[34mReturning the value itself[0m
[34mINFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)[0m
[34mINFO:sagemaker_xgboost_container.training:Running XGBoost Sagemaker in algorithm mode[0m
[34mINFO:root:Determined delimiter of CSV input is ','[0m
[34mINFO:root:Determined

In [19]:
rule_output_path = xgb_model.output_path + "/" + xgb_model.latest_training_job.job_name + "/rule-output"
! aws s3 ls {rule_output_path} --recursive

In [None]:
xgb_model.output_path

In [None]:
output_path

In [None]:
xgb_model.latest_training_job.job_name 

In [None]:
xgb_model

In [None]:
! aws s3 ls {rule_output_path} --recursive

In [None]:
# from IPython.display import FileLink, FileLinks
# display("Click link below to view the XGBoost Training report", FileLink("CreateXgboostReport/xgboost_report.html"))

In [20]:
xgb_model.model_data

's3://sagemaker-us-east-1-432547830124/xgboost-as-a-built-in-algo/xgboost_model/sagemaker-xgboost-2022-12-29-16-11-47-930/output/model.tar.gz'

In [22]:
from sagemaker.serializers import CSVSerializer

In [23]:
xgb_model.deploy(
    initial_instance_count=1,
    instance_type='ml.t2.medium',
    serializer=CSVSerializer()
)

INFO:sagemaker:Creating model with name: sagemaker-xgboost-2022-12-29-16-19-07-866
INFO:sagemaker:Creating endpoint-config with name sagemaker-xgboost-2022-12-29-16-19-07-866
INFO:sagemaker:Creating endpoint with name sagemaker-xgboost-2022-12-29-16-19-07-866


--------------!

<sagemaker.predictor.Predictor at 0x7f635d234220>

In [26]:
'sagemaker-xgboost-2022-12-29-16-19-07-866'.endpoint_name

AttributeError: 'str' object has no attribute 'endpoint_name'

In [28]:
# Create a low-level SageMaker service client.
sagemaker_client = boto3.client('sagemaker')

# Delete endpoint
sagemaker_client.delete_endpoint(EndpointName='sagemaker-xgboost-2022-12-29-16-19-07-866')

INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole


{'ResponseMetadata': {'RequestId': '573141c7-f6bd-45f2-8bde-adc3601a148a',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '573141c7-f6bd-45f2-8bde-adc3601a148a',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Thu, 29 Dec 2022 16:42:06 GMT'},
  'RetryAttempts': 0}}

In [29]:
# Delete endpoint configuration
sagemaker_client.delete_endpoint_config(EndpointConfigName='sagemaker-xgboost-2022-12-29-16-19-07-866')
    

{'ResponseMetadata': {'RequestId': '19898e6d-9d82-4a56-bf08-f39568504f12',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '19898e6d-9d82-4a56-bf08-f39568504f12',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Thu, 29 Dec 2022 16:42:57 GMT'},
  'RetryAttempts': 0}}

In [30]:
sagemaker_client.delete_model(ModelName='sagemaker-xgboost-2022-12-29-16-19-07-866')

{'ResponseMetadata': {'RequestId': '0410022f-cc45-40dc-9acd-89434a3f37cc',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '0410022f-cc45-40dc-9acd-89434a3f37cc',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Thu, 29 Dec 2022 16:44:02 GMT'},
  'RetryAttempts': 0}}