__PURPOSE__

This notebook is intended for augmenting labelled image dataset from *AWS Rekognition Custom Labels* to enlarge the training dataset size.

__TIME__

For every 100 images, the horizontal flip operation should take less than 1 minute.

__INSTRUCTIONS__
1. Make sure the original labelled dataset in AWS S3 has public access.
2. Make sure the original manifest file downloaded from AWS is under the same directory as this notebook.
3. Update the *TESTSET_NAME* variable to be the S3 Bucket name.
4. Uncomment the desired action(s) to be performed in the *SEQ* augmenters definition.
5. Update the *EXTENSION* and *DOWNLOAD_DIR* variables to be the desired extension and output folder of the augmented images.
6. After running the script, upload the images in the output folder to the S3 bucket, under asset/TESTSET_NAME.
7. Replace the updated *output.manifest* file to the  S3 bucket, under datasets/TESTSET_NAME/manifests/output. Do not rename the manifest file. Keep it as *output.manifest*.

In [6]:
import imageio
from imgaug import augmenters as iaa 
import imgaug as ia
from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage
ia.seed(1)

import os
from PIL import Image
import ast
%matplotlib inline

# Define testset name and augmentation operation to be performed.
TESTSET_NAME = 'handsTestSet3'
SEQ = iaa.Sequential([
    # iaa.Fliplr(), # leftright flip
    iaa.Flipud(), # updown flip
    # iaa.GaussianBlur(sigma=(0.0, 2.0)), # Blur
    # iaa.WithBrightnessChannels(iaa.Add((-50, 50))), # Change brightness
    # iaa.Affine(scale=(1.1, 1.5)), # To scale/zoom imgs
])
EXTENSION = '_augmted.jpg'
DOWNLOAD_DIR = 'Augmented Images'

# read manifest file
maniPhotos = open('output.manifest', 'rb').read().decode('UTF-8').split('\n')
if (maniPhotos and (maniPhotos[-1] == "")):
    maniPhotos.pop()

for j, mani in enumerate(maniPhotos):
    
    # get photo
    maniDecoded = ast.literal_eval(mani)
    urlList = maniDecoded['source-ref'].split(os.sep)
    urlList.remove('s3:')
    urlList.remove('')
    urlList.insert(0, 'https://')
    urlList[1] = urlList[1] + '.s3.amazonaws.com'
    url = os.path.join(*urlList)
    
    # read photo and bbox
    img = imageio.imread(url)
    bboxes = maniDecoded[TESTSET_NAME + '_BB']['annotations']
    bbxLst = []
    for bbox in bboxes:
        bbx = BoundingBox(x1 = bbox['left'], 
                          x2=bbox['left'] + bbox['width'], 
                          y1=bbox['top'], 
                          y2=bbox['top'] + bbox['height'])
        bbxLst.append(bbx)
    bbs = BoundingBoxesOnImage(bbxLst, shape = img.shape)
    # ia.imshow(bbs.draw_on_image(img, size=2))

    # augmentation
    image_aug, bbs_aug = SEQ(image=img, bounding_boxes=bbs)
    # ia.imshow(bbs_aug.draw_on_image(image_aug, size=2))
    
    # update photo name and bboxes
    for i, bbs in enumerate(bbs_aug):
        maniDecoded[TESTSET_NAME + '_BB']['annotations'][i]['left'] = int(bbs.x1)
        maniDecoded[TESTSET_NAME + '_BB']['annotations'][i]['width'] = int(bbs.x2 - bbs.x1)
        maniDecoded[TESTSET_NAME + '_BB']['annotations'][i]['top'] = int(bbs.y1)
        maniDecoded[TESTSET_NAME + '_BB']['annotations'][i]['height'] = int(bbs.y2 - bbs.y1)
    photoName = urlList[-1].split('.')[0] + EXTENSION
    newPath = os.path.join(maniDecoded['source-ref'].split(TESTSET_NAME)[0], TESTSET_NAME, photoName)
    maniDecoded['source-ref'] = newPath
    
    # Save Photo
    if (not (os.path.exists(DOWNLOAD_DIR))):
        os.makedirs(DOWNLOAD_DIR)
    img = Image.fromarray(image_aug, 'RGB')
    img.save(os.path.join(DOWNLOAD_DIR, photoName))

    # Save dict as manifest
    maniDecoded = str.encode(str(maniDecoded) + '\n')
    
    f = open("output.manifest", "ab")
    f.write(maniDecoded)
    f.close()
