In [None]:
import json
import numpy as np
import boto3
import sagemaker

BUCKET = 'yourbucket'
#assert BUCKET != 'yourbucket', 'Please provide a custom S3 bucket name.'
EXP_NAME = 'catanddog-smalldataset' # Any valid S3 prefix.

role = sagemaker.get_execution_role()
region = boto3.session.Session().region_name
s3 = boto3.client('s3')
bucket_region = s3.head_bucket(Bucket=BUCKET)['ResponseMetadata']['HTTPHeaders']['x-amz-bucket-region']
assert bucket_region == region, "You S3 bucket {} and this notebook need to be in the same region.".format(BUCKET)


# please replace yourbucket to your s3 bucket where catdog manifest file is stored
OUTPUT_MANIFEST = 's3://yourbucket/output/catanddog-malldataset/manifests/output/output.manifest'

!aws s3 cp {OUTPUT_MANIFEST} 'output.manifest'

with open('output.manifest', 'r') as f:
    output = [json.loads(line) for line in f.readlines()]

# Shuffle output in place.
np.random.shuffle(output)
    
dataset_size = len(output)
train_test_split_index = round(dataset_size*0.8)

train_data = output[:train_test_split_index]
validation_data = output[train_test_split_index:]

num_training_samples = 0
with open('train.manifest', 'w') as f:
    for line in train_data:
        f.write(json.dumps(line))
        f.write('\n')
        num_training_samples += 1
    
with open('validation.manifest', 'w') as f:
    for line in validation_data:
        f.write(json.dumps(line))
        f.write('\n')

In [None]:

s3.upload_file('train.manifest',BUCKET, EXP_NAME + '/train.manifest')
s3.upload_file('validation.manifest',BUCKET, EXP_NAME + '/validation.manifest')



In [None]:
# Create unique job name 
import os
import json
import random
import time
import imageio
import numpy as np
import boto3
import sagemaker
from urllib.parse import urlparse


nn_job_name_prefix = 'groundtruth-augmented-manifest-demo'
timestamp = time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
nn_job_name = nn_job_name_prefix + timestamp

training_image = sagemaker.amazon.amazon_estimator.get_image_uri(boto3.Session().region_name, 'image-classification', repo_version='latest')

training_params = \
{
    "AlgorithmSpecification": {
        "TrainingImage": training_image,
        "TrainingInputMode": "Pipe"
    },
    "RoleArn": role,
    "OutputDataConfig": {
        "S3OutputPath": 's3://{}/{}/output/'.format(BUCKET, EXP_NAME)
    },
    "ResourceConfig": {
        "InstanceCount": 1,   
        "InstanceType": "ml.p3.16xlarge",
        "VolumeSizeInGB": 50
    },
    "TrainingJobName": nn_job_name,
    "HyperParameters": {
        "epochs": "30",
        "image_shape": "3,224,224",
        "learning_rate": "0.01",
        "lr_scheduler_step": "10,20",
        "mini_batch_size": "32",
        "num_classes": "2",
        "num_layers": "18",
        "num_training_samples": str(num_training_samples),
        "resize": "224",
        "use_pretrained_model": "1"
    },
    "StoppingCondition": {
        "MaxRuntimeInSeconds": 86400
    },
    "InputDataConfig": [
        {
            "ChannelName": "train",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "AugmentedManifestFile",
                    "S3Uri": 's3://{}/{}/{}'.format(BUCKET, EXP_NAME, 'train.manifest'),
                    "S3DataDistributionType": "FullyReplicated",
                    "AttributeNames": ["source-ref","category"]
                }
            },
            "ContentType": "application/x-recordio",
            "RecordWrapperType": "RecordIO",
            "CompressionType": "None"
        },
        {
            "ChannelName": "validation",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "AugmentedManifestFile",
                    "S3Uri": 's3://{}/{}/{}'.format(BUCKET, EXP_NAME, 'validation.manifest'),
                    "S3DataDistributionType": "FullyReplicated",
                    "AttributeNames": ["source-ref","category"]
                }
            },
            "ContentType": "application/x-recordio",
            "RecordWrapperType": "RecordIO",
            "CompressionType": "None"
        }
    ]
}

Now we create the SageMaker training job.

In [None]:
sagemaker_client = boto3.client('sagemaker')
sagemaker_client.create_training_job(**training_params)

# Confirm that the training job has started
print('Transform job started')
while(True):
    status = sagemaker_client.describe_training_job(TrainingJobName=nn_job_name)['TrainingJobStatus']
    if status == 'Completed':
        print("Transform job ended with status: " + status)
        break
    if status == 'Failed':
        message = response['FailureReason']
        print('Transform failed with the following error: {}'.format(message))
        raise Exception('Transform job failed') 
    time.sleep(30)