In [None]:
import sagemaker
from sagemaker import get_execution_role

import numpy as np
import pandas as pandas

import boto3
import re

In [None]:
# UPLOAD DATA TO S3

bucket_name = 's3...'

train_data = r'path'
val_data = r'path'

s3_model_ouput_locaton = f's3://{bucket_name}/path to model'
s3_training_file_location = f's3://{bucket_name}/{train_data}'
s3_validation_file_location = f's3://{bucket_name}/{val_data}'

print(s3_model_ouput_locaton)
print(s3_training_file_location)
print(s3_validation_file_location)

In [None]:
# Write to s3
def write_to_s3(filename, bucket, key):
    with open(filename,'rb') as f:
        return boto3.Session().resource('s3').Bucket(bucket).Object(key).upload_fileobj(f)

In [None]:
write_to_s3(
    'train.csv',
    bucket_name,
    val_data + 'train.csv'
)

write_to_s3(
    'test.csv',
    bucket_name,
    val_data + 'test.csv'
)

In [None]:
sess = sagemaker.Session()
role = get_execution_role() #IAM role

container = sagemaker.amazon.amazon_estimator.get_mage_uri(
    sess.boto_region_name,
    "xgboost",
    "latest"
)

print(f' Sagemaker XGBoost Info : \n{container}  ({sess.boto_region_name})')

In [None]:
# Building the model

estimator = sagemaker.estimator.Estimator(
    container,
    role,
    train_instance_count=1,
    train_instance_type='ml.m4.xlarge',
    output_path=s3_model_output_location,
    sagemaker_session=sess,
    base_job_name='v1-xgboost-bcancer'
)

estimator.set_hyperparameters(
    max_depth=3,
    objective="binary:logistic",
    num_round=500
)

estimator.hyperparameters() # comes from local machine


In [None]:
# specify the files for training and validation

training_input_config = sagemaker.session.s3.input(
    s3_data=s3_training_file_location,
    content_type='csv',
    s3_data_type='S3Prefix'
)

validation_input_config = sagemaker.session.s3.input(
    s3_data=s3_validation_file_location,
    content_type='csv',
    s3_data_type='S3Prefix'
)

data_channels = {
    'train':training_input_config,
    'validation':validation_input_config
}

In [None]:
print(training_input_config.config)
print(validation_input_config.config)

In [None]:
estimator.fit(data_channels)

predictor = estimator.deploy(
    initial_instance_count=1, # Load balance
    instance_type='ml.m4.xlarge',
    endpoint_name = 'v2-xgboost-bcancer'
)

In [None]:
from sagemaker.predictor import csv_serializer, json_serializer
predictor.serializer = csv_serializer
predictor.deserializer = None
predictor.content_type = 'text/csv'

In [None]:
# get a realtime endpoint 
enpoint_name = 'v2-xgboost-bcancer'
predictor = sagemaker.predictor.RealTimePredictor(endpoints=enpoint_name)