In [None]:
"""
Augments images from image directories with corresponding xml labels from xml directories,
outputs to the same input image directories
"""

In [None]:
import os
from os.path import join
import imgaug as ia
import numpy as np
import lxml.etree as ET
#import xml.etree.ElementTree as ET
from imgaug import augmenters as iaa
from glob import glob
from PIL import Image

# Without Bounding Boxes

In [None]:
image_directories = [] #array of paths to image directories to augment
image_extension = '.jpg'
augmentation_factor = 1 #factor of number of original images to generate

In [None]:
def augment_images(np_img_array, img_dir):
    """Augments numpy array encoded images and saves them to img dir without overwriting."""
    sometimes = lambda aug: iaa.Sometimes(0.8, aug)
    seq = iaa.Sequential([
        #iaa.Sometimes(0.2, iaa.PiecewiseAffine(scale=(0.01, 0.03))),
        iaa.SomeOf((0, 3),[
            sometimes(iaa.OneOf([iaa.GaussianBlur(sigma=(0.25, 1.25)),
                       #iaa.AverageBlur(k=(3, 7)),
                       #iaa.MedianBlur(k=(3, 7)),
                       iaa.blur.MotionBlur(k=(3, 7)),
                       iaa.blur.BilateralBlur(d=(3, 7), sigma_color=250, sigma_space=250)
                       ])),
              
            sometimes(iaa.OneOf([iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.025*255), per_channel=0.15),
                       iaa.SaltAndPepper((0.05, 0.15))])),
              
            iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)),
              
            iaa.OneOf([iaa.ContrastNormalization((0.5, 1.5), per_channel=0.2),
                       iaa.GammaContrast(gamma=(0.5, 1.5), per_channel=0.2),
                       iaa.SigmoidContrast(gain=(5, 15), cutoff=(0.25,0.75)),
                       iaa.LogContrast(gain=(0.5, 1.0), per_channel=0.2),
                       iaa.LinearContrast(alpha=(0.5, 1.75), per_channel=0.2),
                       iaa.AllChannelsCLAHE(clip_limit=(1,20), per_channel=0.5)
                       ]),
            iaa.OneOf([
                 iaa.Sequential([
                     iaa.ChangeColorspace(from_colorspace="RGB", to_colorspace="HSV"),
                     iaa.WithChannels(0, iaa.Add((50, 100))),
                     iaa.ChangeColorspace(from_colorspace="HSV", to_colorspace="RGB")
                 ]),
                 iaa.Grayscale(alpha=(0.3, 1.0)),
                 iaa.Multiply((0.5, 1.5), per_channel=True)
                 ])
            ], random_order=True),
        sometimes(iaa.OneOf([iaa.CoarseDropout((0.2, 0.25), size_percent=(0.001, 0.02), per_channel=0.1),
                             iaa.CoarseSaltAndPepper((0.2, 0.25), size_percent=(0.001, 0.02),),
                             iaa.Superpixels(p_replace=(0.2, 0.25), n_segments=(128,256))]))
    ], random_order=True) # apply augmenters in random order

    images_aug = seq.augment_images(np_img_array)
    for image in images_aug:
        global image_num 
        image_num += 1
        im = Image.fromarray(image)
        im.save(join(image_dir, 'aug_img_{}{}'.format(image_num, image_extension)))

In [None]:
for image_dir in image_directories:
    image_list = sorted(glob(join(image_dir,'*' + image_extension)))
    np_img_array = []
    
    for image in image_list: 
        np_image = np.array(Image.open(image))
        np_img_array.append(np_image)
    image_num = 0
    
    for cycle in range(augmentation_factor):
        print('Image Directory: {} Cycle: {}'.format(image_dir, cycle))
        augment_images(np_img_array, image_dir)

# With Bounding Boxes

In [None]:
root_dir = '' #directory containing folders of images and xmls in VOC format
image_directories = sorted(glob(join(root_dir, '*')))
xml_directories = sorted(glob(join(root_dir, '*')))
image_extension = '.jpg'
augmentation_factor = 5 #factor of number of original images to generate

In [None]:
def edit_xml(bbs, image_filename, xml_output_filename, image_output_filename):
    """Edits single xml for one image to update augmented bounding box coordinates
       Arguments:
       bbs: instance of BoundingBoxesOnImage
       """
    xml_filename = image_filename[:-4] + '.xml'
    xml_filename = os.path.split(xml_filename)[-1]
    xml_filepath = join(xml_dir, xml_filename)
    
    tree = ET.parse(xml_filepath)
    root = tree.getroot()
    filename = root.find('filename')
    filename.text = os.path.split(image_output_filename)[-1]
    path = root.find('path')
    path.text = image_output_filename
    #remove all current objects to prevent bugs associated with overwriting
    for obj in root.findall('object'):
        root.remove(obj)
    #create new object tag for each bounding box in augmented image
    for bb in bbs.bounding_boxes:
        obj = ET.SubElement(root, 'object')
        name = ET.SubElement(obj, 'name')
        name.text = bb.label
        pose = ET.SubElement(obj, 'pose')
        pose.text = 'Unspecified'
        truncated = ET.SubElement(obj, 'truncated')
        truncated.text = '0'
        difficult = ET.SubElement(obj, 'Difficult')
        difficult.text = '0'
        bndbox = ET.SubElement(obj, 'bndbox')
        xmin = ET.SubElement(bndbox, 'xmin')
        xmax = ET.SubElement(bndbox, 'xmax')
        ymin = ET.SubElement(bndbox, 'ymin')
        ymax = ET.SubElement(bndbox, 'ymax')
        xmin.text = str(int(round(bb.x1)))
        xmax.text = str(int(round(bb.x2)))
        ymin.text = str(int(round(bb.y1)))
        ymax.text = str(int(round(bb.y2)))

    tree.write(xml_output_filename)

In [None]:
def postprocessor(images, augmenter, parents):
    print(augmenter.name)
    return images

In [None]:
def read_images_and_xmls(image_dir, img_list, xml_list):
    np_img_array = []
    bbs_on_img_array = [] #array of BoundingBoxesOnImage instances
    aug_img_list = [] #list of images with corresponding xml file that can be augmented
    for xml in xml_list:
        img_filename = xml[:-4] + image_extension
        img_filename = os.path.split(img_filename)[-1]
        img_path = join(image_dir, img_filename) #get img filename that matches xml filename
        if img_path in img_list: #and check it exists
            
            tree = ET.parse(xml)
            root = tree.getroot()
            size = root.find('size')
            w = int(size.find('width').text)
            h = int(size.find('height').text)
            boxes = []
            for obj in root.findall('object'):
                xmlbox = obj.find('bndbox')
                label = obj.find('name').text
                b = (int(xmlbox.find('xmin').text), int(xmlbox.find('xmax').text), 
                     int(xmlbox.find('ymin').text), int(xmlbox.find('ymax').text),
                     label)
                boxes.append(b)
            if boxes == []:
                continue

            bbs_on_img = ia.BoundingBoxesOnImage(
                [ia.BoundingBox(x1=box[0], y1=box[2], 
                                x2=box[1], y2=box[3], label=box[4]) for box in boxes],
                shape=(h,w))
            bbs_on_img_array.append(bbs_on_img)
            
            aug_img_list.append(img_path)
            np_image = np.asarray(Image.open(img_path))
            np_img_array.append(np_image)
            
        else: #if image does not exist for the xml filename
            print('Image not found for xml: ', xml)
            continue
    return np_img_array, bbs_on_img_array, aug_img_list

In [None]:
def augment_images_with_bbs(image_dir, xml_dir, np_img_array, bbs_on_img_array, aug_img_list):
    sometimes = lambda aug: iaa.Sometimes(0.8, aug)
    seq = iaa.Sequential(
        [iaa.Fliplr(0.5), 
         iaa.Affine(
            #scale={"x": (0.9, 1.1), "y": (0.9, 1.1)},
            #translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
            rotate=(-30, 30),
            #shear=(-16, 16),
            mode='edge')
        ],
        # do all of the above augmentations in random order
        random_order=True
    )
    hooks = ia.HooksImages(postprocessor=postprocessor)
    assert len(bbs_on_img_array) == len(np_img_array)
    seq_det = seq.to_deterministic()
    image_aug = seq_det.augment_images(np_img_array, hooks=hooks)
    bbs_aug = seq_det.augment_bounding_boxes(bbs_on_img_array)
    cleaned_bbs_aug = [] #array of bbs that are not out of image
    for i, bb in enumerate(bbs_aug): #bb = bbs on img
        cleaned_bbs_aug.append(bb.remove_out_of_image().cut_out_of_image())
    assert len(image_aug) == len(cleaned_bbs_aug)
    #convert to PIL and save img and xml in respective directories
    for count, (image, bbs) in enumerate(zip(image_aug, cleaned_bbs_aug)):
        global image_num 
        image_num += 1
        im = Image.fromarray(image)
        im.save(join(image_dir, 'aug_img_{}{}'.format(image_num, image_extension)))
        edit_xml(bbs, 
                 aug_img_list[count], 
                 join(xml_dir, 'aug_img_{}{}'.format(image_num, '.xml')),
                 join(image_dir, 'aug_img_{}{}'.format(image_num, image_extension)))
    

In [None]:
for image_dir, xml_dir in zip(image_directories, xml_directories):
    image_num = 0
    img_list = sorted(glob(join(image_dir, '*' + image_extension)))
    xml_list = sorted(glob(join(xml_dir, '*.xml')))
    np_img_array, bbs_on_img_array, aug_img_list = read_images_and_xmls(image_dir, img_list, xml_list)
    for cycle in range(augmentation_factor):
        print('Image Directory: {} Cycle: {}'.format(image_dir, cycle))
        augment_images_with_bbs(image_dir, xml_dir, np_img_array, bbs_on_img_array, aug_img_list)

# Remove top quarter of image

In [None]:
from PIL import Image
from os.path import join
import keras
from glob import glob
from keras_preprocessing.image import array_to_img, img_to_array

In [None]:
img_dir_path = ''
output_path = '' 
for i, img in enumerate(glob(join(img_dir_path, '*.jpg'))):
    im = Image.open(img)
    arr = img_to_array(im)
    idx = int(0.25*arr.shape[0])
    arr = arr[idx:]
    new_im = array_to_img(arr)
    new_im.save(join(output_path, 'image_{}.jpg'.format(i)))

# Remove augmented images

In [None]:
root_dir = ''
files = glob(join(root_dir, '**', '*', recursive=True)

In [None]:
for file in files:
    filename = os.path.split(file)[-1]
    if filename[:3] == 'aug':
        os.remove(file)

In [None]:
for image_dir in image_directories:
    image_list = sorted(glob(image_dir + '*' + image_extension))
    np_img_array = []
    
    for image in image_list: 
        np_image = np.array(Image.open(image))
        np_img_array.append(np_image)
    image_num = 0
    
    for cycle in range(augmentation_factor):
        print('Image Directory: {} Cycle: {}'.format(image_dir, cycle))
        test_augment_images(np_img_array, image_dir)