In [1]:
import pandas as pd
import numpy as np
import boto3
import re
import sagemaker
from sagemaker import get_execution_role

### upload data to s3

In [None]:
bucket_name='ml-sagemaker-practise'
training_folder=r'bikerental/training/'
validation_folder=r'bikerental/validation'
test_folder=r'bikerentral/test'

s3_model_output_location=r's3://{0}/bikerental/model'.format(bucket_name)
s3_training_file_location=r's3://{0}/{1}'.format(bucket_name,training_folder)
s3_validation_file_location=r's3://{0}/{1}'.format(bucket_name,validation_folder)
s3_test_file_location=r's3://{o}/{1}',format(bucket_name,test_folder)

In [None]:
print(s3_model_output_location)
print(s3_training_file_location)
print(s3_validation_file_location)
print(s3_test_file_location)


In [None]:
def write_to_s3(filename,bucket,key):
    with open(filename,'rb')as f:#read in binary mode
        return boto3.Session().resource('s3').Bucket(bucket).Object(key).upload_fileobj(f)

In [None]:
write_to_s3('bikerental_train.csv',bucket_name,training_folder +'bikerental_train.csv')

write_to_s3('bikerental_valid.csv',bucket_name,training_folder +'bikerental_valid.csv')

write_to_s3('bikerental_test.csv',bucket_name,training_folder +'bikerental_test.csv')
            

### Training xgboost

In [None]:
use_spot_instances=True
max_run=3700 
max_wait=7200 if use_spot_instances else None
job_name='xgboost_bikerental'
checkpoint_s3_uri=None
if use_spot_instances:
    checkpoint_s3_uri=f's3://{bucket_name}/bikerental/checkpoints/{job_name}'
print(f'checkpoint_uri:{checkpoint_s3_uri}')
    

In [None]:
#estabilish a session with aws
sess=sagemaker.Session()

In [None]:
role=get_execution_role()
print(role)

In [None]:
container=sagemaker.image_uris.retrieve("xgboost",sess.boto_region_name,version="1.2-2")
print(f"xgboost container {container})

In [None]:
estimator=sagemaker.estimator.Estimator(
    container,
    role,
    instance_count=1,
    instance_type='ml.m5.xlarge',
    output_path=s3_model_output_location,
    sagemaker_session=sess,
    base_job_name=job_name,
    use_spot_instances=use_spot_instances,
    max_run=max_run,
    max_wait=max_wait,
    checkpoint_s3_uri=checkpoint_s3_uri
)
    

In [None]:
estimator.set_hyperparameters(max_depth=5,
                              objective="reg:squarederror",
                              eta=0.1,
                              num_round=150
                             )

In [None]:
estimator.hyperparameters()

In [None]:
training_input_config = sagemaker.session.TrainingInput(
    s3_data=s3_training_file_location,
    content_type='csv',
    s3_data_type='S3Prefix')

validation_input_config = sagemaker.session.TrainingInput(
    s3_data=s3_validation_file_location,
    content_type='csv',
    s3_data_type='S3Prefix'
)

data_channels = {'train': training_input_config, 'validation': validation_input_config}

In [None]:
print(training_input_config)
print(validation_input_config)

In [None]:
estimator.fit(data_channels)

### deploy model and run predictions

In [None]:
predictor=estimator.deploy(initial_instance_count=1,
                           instance_type='ml.m5.xlarge',
                           endpoint_name=job_name
                          )

In [None]:
from sagemaker.serializers import CSVSerializer
predictor.serializer = CSVSerializer()
predictor.predict([[3,0,1,2,28.7,33.335,79,12.998,2011,7,7,3]])