### Train bike rental dataset with two versions of XGBoost: 0.90-2 and 1.2-2

In [2]:
import numpy as np
import pandas as pd

import boto3 
import re

import sagemaker
from sagemaker import get_execution_role

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [3]:
### Import s3 bucket name as environment variable

import os
env_vars = !cat ./.env
for var in env_vars:
    key, value = var.split('=')
    os.environ[key] = value

### Upload Data to s3

In [4]:
# Initializing s3 bucket and s3 locations

bucket_name = os.environ['BUCKET_NAME']

training_folder = r'bikerental/training/'
validation_folder = r'bikerental/validation/'
test_folder = r'bikerental/test/'

s3_model_output_location = r's3://{0}/bikerental/model'.format(bucket_name)
s3_training_file_location = r's3://{0}/{1}'.format(bucket_name,training_folder)
s3_validation_file_location = r's3://{0}/{1}'.format(bucket_name,validation_folder)
s3_test_file_location = r's3://{0}/{1}'.format(bucket_name,test_folder)

In [5]:
# Data Chhannels for the training algorithm
# content type can be libsvm or csv for XGBoost

training_input_config = sagemaker.session.TrainingInput(
    s3_data = s3_training_file_location,
    content_type='csv',
    s3_data_type ='S3Prefix'
)

validation_input_config = sagemaker.session.TrainingInput(
    s3_data = s3_validation_file_location,
    content_type='csv',
    s3_data_type='S3Prefix'
)

data_channels = {'train': training_input_config, 'validation':validation_input_config}

In [7]:
# write files into s3 using boto3
# filename - filename
# bucket - bucket name
# Key - file location in s3(folder path)

def write_to_s3(filename, bucket, key):
    with open(filename,'rb') as f:
        boto3.Session().resource('s3').Bucket(bucket).Object(key).upload_fileobj(f)

In [8]:
write_to_s3('bike_train_rev3.csv', bucket_name, training_folder+'bike_test_rev3.csv')

write_to_s3('bike_validation_rev3.csv', bucket_name, validation_folder+'bike_validation_rev3.csv')

write_to_s3('bike_test_rev3.csv', bucket_name, test_folder+'bike_test_rev3.csv')

### Traing Algorithm Docker Image

#### SageMaker manages separate image for each algorithm 