In [1]:
import os
import json
import argparse
import numpy as np
import random
import math
import csv
from PIL import Image, ImageEnhance

In [2]:
# parser = argparse.ArgumentParser(description='Create synthetic training data for object detection algorithms.')
# parser.add_argument("-bkg", "--backgrounds", type=str, default="Backgrounds/",
#                     help="Path to background images folder.")
# parser.add_argument("-obj", "--objects", type=str, default="Objects/",
#                     help="Path to object images folder.")
# parser.add_argument("-o", "--output", type=str, default="TrainingImages/",
#                     help="Path to output images folder.")
# parser.add_argument("-ann", "--annotate", type=bool, default=True,
#                     help="Include annotations in the data augmentation steps?")
# parser.add_argument("-s", "--sframe", type=bool, default=False,
#                     help="Convert dataset to an sframe?")
# parser.add_argument("-g", "--groups", type=bool, default=True,
#                     help="Include groups of objects in training set?")
# parser.add_argument("-mut", "--mutate", type=bool, default=False,
#                     help="Perform mutatuons to objects (rotation, brightness, shapness, contrast)")

In [3]:
# base_bkgs_path = args.backgrounds
# annotate = args.annotate
# mutate = args.mutate
# obj_groups = args.groups
# sframe = args.sframe
# objs_path = args.objects
# output_images = args.output_images

base_bkgs_path = './bkg_images/'
annotate = True
mutate = False
obj_groups = True
sframe = False
objs_path = './obj_images/'
output_images = './syn_images/'
sizes = [0.8, 1.0] # different obj sizes to use TODO make configurable
count_per_size = 3 # number of locations for each obj size TODO make configurable
annotations = [] # store annots here
n = 1

In [4]:
count = 3
list(np.random.randint(0, 1, count))

[0, 0, 0]

In [5]:
# Helper functions
def get_obj_positions(obj, bkg, count=1, top = -1):
    obj_w, obj_h = [], []
    x_positions, y_positions = [], []
    bkg_w, bkg_h = bkg.size
    # Rescale our obj to have a couple different sizes
    obj_sizes = [tuple([int(s*x) for x in obj.size]) for s in sizes]
    for w, h in obj_sizes:
        obj_w.extend([w]*count)
        obj_h.extend([h]*count)
        max_x, max_y = bkg_w-w, bkg_h-h
        if max_x == 0 : max_x = 1
        if max_y == 0 : max_y = 1
        x_positions.extend(list(np.random.randint(0, max_x, count)))
        
        if (top == 0):
            y_positions.extend(list(np.random.randint(0, 1, count)))
        else:
            y_positions.extend(list(np.random.randint(max_y/2, max_y, count)))
            
    return obj_h, obj_w, x_positions, y_positions

In [6]:
def get_box(obj_w, obj_h, max_x, max_y, top = -1):
    if max_x == 0 : max_x = 1
    if max_y == 0 : max_y = 1
    x1 = np.random.randint(0, max_x, 1)
    if top == 0:
        y1 = np.random.randint(0, 1, 1)
    else:
        y1 = np.random.randint(max_y/2, max_y, 1)
    x2, y2 = x1 + obj_w, y1 + obj_h
    return [x1[0], y1[0], x2[0], y2[0]]

In [7]:
# check if two boxes intersect
def intersects(box, new_box):
    box_x1, box_y1, box_x2, box_y2 = box
    x1, y1, x2, y2 = new_box
    return not (box_x2 < x1 or box_x1 > x2 or box_y1 > y2 or box_y2 < y1)

In [8]:
def iou(box, new_box):
    box_x1, box_y1, box_x2, box_y2 = box
    x1, y1, x2, y2 = new_box
    
    xi1 = np.maximum(box_x1, x1)
    yi1 = np.maximum(box_y1, y1)
    xi2 = np.minimum(box_x2, x2)
    yi2 = np.minimum(box_y2, y2)
    inter_width = xi2-xi1
    inter_height = yi2-yi1
    inter_area = max(inter_width, 0) * max(inter_height, 0)

    box1_area = (box_x2-box_x1)*(box_y2-box_y1)
    box2_area = (x2-x1)*(y2-y1)
    union_area = box1_area + box2_area - inter_area
    iou = float(inter_area)/float(union_area)
    return iou

In [9]:
def get_group_obj_positions(obj_group, bkg, objs_path):
    bkg_w, bkg_h = bkg.size
    boxes = []
    obj_images = [f for f in os.listdir(objs_path) if not f.startswith(".")]
    obj_paths = [objs_path + obj_images[i] for i in obj_group]
    objs = [Image.open(p) for p in obj_paths]
    obj_sizes = [tuple([int(0.7*x) for x in i.size]) for i in objs]
    ctr = 0
    for w, h in obj_sizes:
        if 'furniture_' in obj_paths[ctr]:
            top = 0
        else:
            top = -1
        ctr += 1
        # set background image boundaries
        max_x, max_y = bkg_w-w, bkg_h-h
        # get new box coordinates for the obj on the bkg
        count = 0
        while True:
            count += 1
            new_box = get_box(w, h, max_x, max_y, top)
            for box in boxes:
                if intersects(box, new_box) and iou(box, new_box) > 0.4:
                    break

            else:
                break  # only executed if the inner loop did NOT break
            #print("retrying a new obj box")
            continue  # only executed if the inner loop DID break
        # append our new box
        boxes.append(new_box)
    return obj_sizes, boxes

In [10]:
def mutate_image(img):
    # resize image for random value
    resize_rate = random.choice(sizes)
    img = img.resize([int(img.width*resize_rate), int(img.height*resize_rate)], Image.BILINEAR)

    # rotate image for random andle and generate exclusion mask 
    rotate_angle = random.randint(0,360)
    mask = Image.new('L', img.size, 255)
    img = img.rotate(rotate_angle, expand=True)
    mask = mask.rotate(rotate_angle, expand=True)


    # perform some enhancements on image
    enhancers = [ImageEnhance.Brightness, ImageEnhance.Color, ImageEnhance.Contrast, ImageEnhance.Sharpness]
    enhancers_count = random.randint(0,3)
    for i in range(0,enhancers_count):
        enhancer = random.choice(enhancers)
        enhancers.remove(enhancer)
        img = enhancer(img).enhance(random.uniform(0.5,1.5))

    return img, mask

In [11]:
def write_to_csv(data):
    csv_columns = ['index','image','xmin','ymin', 'xmax', 'ymax', 'label']
    with open(output_images + "train_data.csv","w") as training_data:
        writer = csv.DictWriter(training_data, fieldnames=csv_columns)
        writer.writeheader()
        for key,value in data.items():
            training_data.write("%s,%s,%s,%s,%s,%s,%s\n"%(key,str(value[0]), str(value[1]), str(value[2]), str(value[3]), str(value[4]), value[5]))

In [12]:
n = 1

In [44]:
def synthesize_images(base_bkgs_path, objs_path, output_images, obj_groups=True, annotate=True, mutate=False):
    global n
    csv_idx = 1
    print("Making synthetic images.", flush=True)
    bkg_images = [f for f in os.listdir(base_bkgs_path) if not f.startswith(".")]
    obj_images = [f for f in os.listdir(objs_path) if not f.startswith(".")]
    labelled_data = {}
    for bkg in bkg_images:
        # Load the background image
        bkg_path = base_bkgs_path + bkg
        bkg_img = Image.open(bkg_path)
        bkg_x, bkg_y = bkg_img.size

        # Do single objs first
        for i in obj_images:
            # Load the single obj
            i_path = objs_path + i
            obj_img = Image.open(i_path)
            obj_img.getdata
            top = -1
            if 'furniture_' in i:
                top = 0
            # Get an array of random obj positions (from top-left corner)
            obj_h, obj_w, x_pos, y_pos = get_obj_positions(obj=obj_img, bkg=bkg_img, count=count_per_size, top=top)            
            
            # Create synthetic images based on positions
            for h, w, x, y in zip(obj_h, obj_w, x_pos, y_pos):
                # Copy background
                bkg_w_obj = bkg_img.copy()
                
                if mutate:
                    new_obj, mask = mutate_image(obj_img)
                    # Paste on the obj
                    bkg_w_obj.paste(new_obj, (x, y), new_obj)
                else:
                    # Adjust obj size
                    new_obj = obj_img.resize(size=(w, h))
                    # Paste on the obj
                    bkg_w_obj.paste(new_obj, (x, y), new_obj)
                    img_name = 'syn_img_' + str(n) + ".jpg"
                output_fp = output_images + img_name
                # Save the image
                bkg_w_obj.save(fp=output_fp)

                if annotate:
                    labelled_data[csv_idx] = (img_name, int(x), int(y), int(x+w), int(y+h), i.split(".jpg")[0].split('_')[0])
                    csv_idx += 1
                #print(n)
                n += 1

        if obj_groups:
            # 24 Groupings of 2-4 objs together on a single background
            groups = [np.random.randint(0, len(obj_images) -1, np.random.randint(2, 5, 1)) for r in range(2*len(obj_images))]
            # For each group of objs
            for group in groups:
                # Get sizes and positions
                obj_sizes, boxes = get_group_obj_positions(group, bkg_img, objs_path)
                bkg_w_obj = bkg_img.copy()
                img_name = 'syn_img_' + str(n) + ".jpg"
                output_fp = output_images + img_name
                # For each obj in the group
                for i, size, box in zip(group, obj_sizes, boxes):
                    # Get the obj
                    obj = Image.open(objs_path + obj_images[i])
                    obj_w, obj_h = size
                    # Resize it as needed
                    new_obj = obj.resize((obj_w, obj_h))
                    x_pos, y_pos = box[:2]
                    
                    # Paste the obj to the background
                    bkg_w_obj.paste(new_obj, (x_pos, y_pos), new_obj)
                    
                    if annotate:
                    # ('x','y', 'height', 'width', 'label')
                        labelled_data[csv_idx] = (img_name, int(x_pos), int(y_pos), int(x_pos+obj_w), int(y_pos+obj_h), obj_images[i].split(".jpg")[0].split('_')[0])
                        csv_idx += 1
                        
                # Save image
                bkg_w_obj.save(fp=output_fp)
                
                #print(n)
                n += 1

    if annotate:
        print("Saving out Annotations", flush=True)
        # Save annotations
        write_to_csv(labelled_data)

    total_images = len([f for f in os.listdir(output_images) if not f.startswith(".")])
    print("Done! Created {} synthetic training images.".format(total_images), flush=True)


In [45]:
synthesize_images(base_bkgs_path, objs_path, output_images, obj_groups=True)

Making synthetic images.
Saving out Annotations
Done! Created 4801 synthetic training images.


### Converting Video to Images

In [17]:
import cv2
def convert_video_to_images(videoPath):
    vcap = cv2.VideoCapture(videoPath)
    filename = os.path.basename(vidPath).split('.')[0]
    dirname = os.path.dirname(vid_path)
    # read every nth frame
    count=0
    frame_num = []
    while vcap.isOpened():
        ret, frame = vcap.read()
        if ret:
            frame_count = filename + "_img_" + str(count) + ".jpg"
            cv2.imwrite(os.path.join(dirname, frame_count), frame)
            frame_num.append(frame_count)
            count += 10
            vcap.set(1, count)
        else:
            vcap.release()
            break

In [13]:
vid_path = r'D:\MachineLearning\BoschHackathon\train_data\video_imgs\door_5.mp4'
#convert_video_to_images(vid_path)

In [16]:
os.path.

'.'

### Modifying CSV

In [None]:
import os
train_data = './train_data/train/'
print(len([name for name in os.listdir(train_data) if ('.jpg' in name)]))

In [None]:
import pandas as pd

list_dt = []
for name in os.listdir(train_data):
  if ('.csv' in name):
    dt = pd.read_csv(os.path.join(train_data, name))
    list_dt.append(dt)
ds  = pd.concat(list_dt, axis=0, ignore_index=True)

In [None]:
ds.head()

In [None]:
ds['width'] = round(ds.xmax - ds.xmin)
ds['height'] = round(ds.ymax - ds.ymin)
ds['x'] = round((ds.xmax + ds.xmin)/2)
ds['y'] = round((ds.ymax + ds.ymin)/2)

In [None]:
ds.head()

In [None]:
new_ds = ds[['image', 'x', 'y', 'height', 'width', 'label']]
new_ds.head()

In [None]:
new_ds.to_csv('./train_data/train/consolidated_list.csv')