In [1]:
## Here we turn the bucket containing all the images into a RECORDIO file

In [1]:

# An S3 Bucket Name
data_bucket_name='hotel50k-data'

# A prefix name inside the S3 bucket containing sub-folders of images (one per label class)
dataset_name = 'hotel_classes'

In [2]:
!ls

Image-classification-fulltraining-elastic-inference.ipynb
Image-classification-fulltraining-highlevel.ipynb
Image-classification-fulltraining.ipynb
Image-classification-incremental-training-highlevel.ipynb
Image-classification-lst-format-highlevel.ipynb
Image-classification-lst-format.ipynb
Image-classification-transfer-learning-highlevel.ipynb
Image-classification-transfer-learning.ipynb
LoadImages.ipynb
Test1.ipynb
tmp
Train_CNN_TransferLearning.ipynb
Train_CNN_TransferLearning_LargeBatch.ipynb


In [4]:
import sagemaker
from sagemaker import get_execution_role
from sagemaker.amazon.amazon_estimator import get_image_uri
import boto3
role = get_execution_role()
sess = sagemaker.Session()



In [5]:
base_dir='./tmp'
%env BASE_DIR=$base_dir
%env S3_DATA_BUCKET_NAME = $data_bucket_name
%env DATASET_NAME = $dataset_name

import sys,os

suffix='/mxnet/tools/im2rec.py'
im2rec = list(filter( (lambda x: os.path.isfile(x + suffix )), sys.path))[0] + suffix
%env IM2REC=$im2rec


env: BASE_DIR=./tmp
env: S3_DATA_BUCKET_NAME=hotel50k-data
env: DATASET_NAME=hotel_classes
env: IM2REC=/home/ec2-user/anaconda3/envs/mxnet_latest_p37/cpu/lib/python3.7/site-packages/mxnet/tools/im2rec.py


In [6]:
!pwd
!mkdir $BASE_DIR
!ls

/home/ec2-user/SageMaker/cse281_model
ConvertImages.ipynb
Image-classification-fulltraining-elastic-inference.ipynb
Image-classification-fulltraining-highlevel.ipynb
Image-classification-fulltraining.ipynb
Image-classification-incremental-training-highlevel.ipynb
Image-classification-lst-format-highlevel.ipynb
Image-classification-lst-format.ipynb
Image-classification-transfer-learning-highlevel.ipynb
Image-classification-transfer-learning.ipynb
LoadImages.ipynb
Test1.ipynb
tmp


In [7]:
!aws s3 sync s3://$S3_DATA_BUCKET_NAME/$DATASET_NAME $BASE_DIR/$DATASET_NAME --quiet

In [None]:
%%bash
# Use the IM2REC script to convert our images into RecordIO files

# Clean up our working dir of existing LST and REC files
cd $BASE_DIR
rm *.rec
rm *.lst

# First we need to create two LST files (training and test lists), noting the correct label class for each image
# We'll also save the output of the LST files command, since it includes a list of all of our label classes
echo "Creating LST files"
python $IM2REC --list --recursive --resize=244 --pass-through --center-crop --test-ratio=0.3 --train-ratio=0.7 $DATASET_NAME $DATASET_NAME > ${DATASET_NAME}_classes

echo "Label classes:"
cat ${DATASET_NAME}_classes

# Then we create RecordIO files from the LST files
echo "Creating RecordIO files"
python $IM2REC --num-thread=4 ${DATASET_NAME}_train.lst $DATASET_NAME
python $IM2REC --num-thread=4 ${DATASET_NAME}_test.lst $DATASET_NAME
ls -lh *.rec

In [18]:
import pathlib
pathlib.Path().absolute()

PosixPath('/home/ec2-user/SageMaker/cse281_model')

In [16]:
bucket = sess.default_bucket()

s3train_path = 's3://{}/{}/train/'.format(bucket, dataset_name)
s3validation_path = 's3://{}/{}/validation/'.format(bucket, dataset_name)

# Clean up any existing data
!aws s3 rm s3://{bucket}/{dataset_name}/train --recursive
!aws s3 rm s3://{bucket}/{dataset_name}/validation --recursive

# Upload the rec files to the train and validation channels
!aws s3 cp /tmp/{dataset_name}_train.rec $s3train_path
!aws s3 cp /tmp/{dataset_name}_test.rec $s3validation_path

upload: ../../../../tmp/hotel_classes_train.rec to s3://sagemaker-us-east-2-338648799047/hotel_classes/train/hotel_classes_train.rec
upload: ../../../../tmp/hotel_classes_test.rec to s3://sagemaker-us-east-2-338648799047/hotel_classes/validation/hotel_classes_test.rec


In [None]:
!ls

In [3]:
train_data = sagemaker.inputs.TrainingInput(
    s3train_path, 
    distribution='FullyReplicated', 
    content_type='application/x-recordio', 
    s3_data_type='S3Prefix'
)

validation_data = sagemaker.inputs.TrainingInput(
    s3validation_path, 
    distribution='FullyReplicated', 
    content_type='application/x-recordio', 
    s3_data_type='S3Prefix'
)

data_channels = {'train': train_data, 'validation': validation_data}

NameError: name 'sagemaker' is not defined

In [30]:
s3_output_location = 's3://{}/{}/output'.format(bucket, dataset_name)
# training_image = sagemaker.image_uris.retrieve(sess.boto_region_name, 'image-classification')
training_image = sagemaker.image_uris.retrieve('image-classification', boto3.Session().region_name)

image_classifier = sagemaker.estimator.Estimator(
    training_image,
    role, 
    instance_count=1, 
    instance_type='ml.p3.2xlarge',
    volume_size=100,
    output_path=s3_output_location,
    sagemaker_session=sess
)

In [31]:

num_classes=! ls -l {base_dir}/{dataset_name} | wc -l
num_classes=int(num_classes[0]) - 1

num_training_samples=! cat {base_dir}/{dataset_name}_train.lst | wc -l
num_training_samples = int(num_training_samples[0])

# Learn more about the Sagemaker built-in Image Classifier hyperparameters here: https://docs.aws.amazon.com/sagemaker/latest/dg/IC-Hyperparameter.html

# These hyperparameters we won't want to change, as they define things like
# the size of the images we'll be sending for input, the number of training classes we have, etc.
base_hyperparameters=dict(
    use_pretrained_model=1,
    image_shape='3,224,224',
    num_classes=num_classes,
    num_training_samples=num_training_samples,
)

# These are hyperparameters we may want to tune, as they can affect the model training success:
hyperparameters={
    **base_hyperparameters, 
    **dict(
        learning_rate=0.001,
        mini_batch_size=5,
    )
}


image_classifier.set_hyperparameters(**hyperparameters)

hyperparameters

{'use_pretrained_model': 1,
 'image_shape': '3,224,224',
 'num_classes': 18190,
 'num_training_samples': 158499,
 'learning_rate': 0.001,
 'mini_batch_size': 5}

In [None]:
%%time

import time
now = str(int(time.time()))
training_job_name = 'IC-' + dataset_name.replace('_', '-') + '-' + now

image_classifier.fit(inputs=data_channels, job_name=training_job_name, logs=True)

job = image_classifier.latest_training_job
model_path = f"{base_dir}/{job.name}"

print(f"\n\n Finished training! The model is available for download at: {image_classifier.output_path}/{job.name}/output/model.tar.gz")

2020-12-03 21:20:25 Starting - Starting the training job...
2020-12-03 21:20:27 Starting - Launching requested ML instances......
2020-12-03 21:21:34 Starting - Preparing the instances for training......
2020-12-03 21:22:43 Downloading - Downloading input data.................................
2020-12-03 21:28:26 Training - Downloading the training image...
2020-12-03 21:28:40 Training - Training image download completed. Training in progress.[34mDocker entrypoint called with argument(s): train[0m
[34m[12/03/2020 21:28:43 INFO 139737112188736] Reading default configuration from /opt/amazon/lib/python2.7/site-packages/image_classification/default-input.json: {u'beta_1': 0.9, u'gamma': 0.9, u'beta_2': 0.999, u'optimizer': u'sgd', u'use_pretrained_model': 0, u'eps': 1e-08, u'epochs': 30, u'lr_scheduler_factor': 0.1, u'num_layers': 152, u'image_shape': u'3,224,224', u'precision_dtype': u'float32', u'mini_batch_size': 32, u'weight_decay': 0.0001, u'learning_rate': 0.1, u'momentum': 0}[0m

In [None]:
#Might want to update to mb size of 64?
#Might want to use multiple ec2 instances 