In [1]:
import os
import numpy as np
import cv2
from matplotlib import pyplot as plt
from tqdm import tqdm
import random
import shutil
from utils import pad_int_zeros
%matplotlib inline
from imgaug import augmenters as iaa
import imgaug as ia

In [2]:
TRAIN_VAL_TEST_DIR='../data/aicd_patches/fixed_test_newAug_testR=0.1_valT=0.2_numSamples=30/'
DATASET='AICD' # AICD / TSUNAMI
DATASET_DIR = '../data/DATASETS/AICDDataset/'

In [12]:
class DataPreparator:
    def __init__(self, images_1, images_2, cdmaps, dimx, dimy, invert_gt):
        self.images_1 = images_1
        self.images_2 = images_2
        self.cdmaps = cdmaps
        self.dimx = dimx
        self.dimy = dimy
        self.train_and_val = []
        self.test = []
        self.invert_gt=invert_gt
        
    def remove_and_create_dirs(self):
        try:
            shutil.rmtree(TRAIN_VAL_TEST_DIR)
        except FileNotFoundError:
            pass
        finally:
            os.mkdir(TRAIN_VAL_TEST_DIR)
            dir_names=['1/', '2/', 'gt/']
            os.mkdir(TRAIN_VAL_TEST_DIR+'train/')
            os.mkdir(TRAIN_VAL_TEST_DIR+'val/')
            os.mkdir(TRAIN_VAL_TEST_DIR+'test/')
            
            for dir_name in dir_names:
                os.mkdir(TRAIN_VAL_TEST_DIR+'train/'+dir_name)
                os.mkdir(TRAIN_VAL_TEST_DIR+'val/'+dir_name)
                os.mkdir(TRAIN_VAL_TEST_DIR+'test/'+dir_name)
                
    def augment(self, img_1, img_2, img_gt):
        assert img_1.shape == img_2.shape
        assert img_1.shape[:2] == img_gt.shape[0:2]
        
        sometimes = lambda aug: iaa.Sometimes(0.5, aug)
        
        # sequence for images and gt
        seq_all = iaa.Sequential([
            iaa.Fliplr(0.5), # horizontally flip 50% of all images
            iaa.Flipud(0.5), # vertically flip 50% of all images
            
            sometimes(iaa.Affine(
                scale={"x": (0.8, 1.2), "y": (0.8, 1.2)}, # scale images to 80-120% of their size, individually per axis
                translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)}, # translate by -20 to +20 percent (per axis)
                rotate=(-45, 45), # rotate by -45 to +45 degrees
                shear=(-16, 16), # shear by -16 to +16 degrees
                order=[0, 1], # use nearest neighbour or bilinear interpolation (fast)
                mode='reflect' # use reflections
            ))], 
            random_order=True)
        seq_all_det = seq_all.to_deterministic()
        
        # augment images and gt
        new_img_1 = seq_all_det.augment_images([img_1])[0]
        new_img_2 = seq_all_det.augment_images([img_2])[0]
        new_img_gt = seq_all_det.augment_images([img_gt])[0]
        
        # add some non-deterministic salt-n-papper for images
        seq_images = iaa.Sequential([iaa.SaltAndPepper(0.03)])
        new_img_1 = seq_images.augment_images([new_img_1])[0]
        new_img_2 = seq_images.augment_images([new_img_2])[0]
        
        return new_img_1, new_img_2, new_img_gt

            
            
    def create_patches(self, dir_name, dataset_indicies, translate_rate=0.10, num_samples=30):
        img_pos_index=0
        img_neg_index=0
        for i in tqdm(range(len(dataset_indicies))):
            for j in (range(num_samples)):
                img_1 = cv2.imread(self.images_1[dataset_indicies[i]])
                img_2 = cv2.imread(self.images_2[dataset_indicies[i]])
                img_gt = cv2.imread(self.cdmaps[dataset_indicies[i]],0)
                if self.invert_gt:
                    img_gt = cv2.bitwise_not(img_gt)
                ret,thresh = cv2.threshold(img_gt,127,255,0)
                im2, contours, hierarchy = cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
                
                # POSITIVE CLASS ~ CHANGES
                for counter in contours:
                    M = cv2.moments(counter)
                    #print (counter)
                    if M['m00'] == 0:
                        continue
                        
                    # find center of change
                    cx = int(M['m10']/M['m00'])
                    cy = int(M['m01']/M['m00'])
                    
                    # translate randomly to translate_rate                   
                    cx = cx + (random.random()*2-1)*translate_rate*img_gt.shape[1]
                    cy = cy + (random.random()*2-1)*translate_rate*img_gt.shape[0]
                    
                    cx = int(np.clip(cx, self.dimx//2, img_gt.shape[1]-self.dimx//2))
                    cy = int(np.clip(cy, self.dimy//2, img_gt.shape[0]-self.dimy//2))

                    
                    new_img_gt = img_gt[cy-self.dimy//2:cy+self.dimy//2, cx-self.dimx//2:cx+self.dimx//2]
                    new_img_1 = img_1[cy-self.dimy//2:cy+self.dimy//2, cx-self.dimx//2:cx+self.dimx//2]
                    new_img_2 = img_2[cy-self.dimy//2:cy+self.dimy//2, cx-self.dimx//2:cx+self.dimx//2]
                    
                    # augment one patch
                    new_img_1, new_img_2, new_img_gt = self.augment(new_img_1, new_img_2, new_img_gt)

                    cv2.imwrite(dir_name+'gt/'+'P'+pad_int_zeros(img_pos_index, 6)+'.png', new_img_gt)
                    cv2.imwrite(dir_name+'1/'+'P'+pad_int_zeros(img_pos_index, 6)+'.png', new_img_1)
                    cv2.imwrite(dir_name+'2/'+'P'+pad_int_zeros(img_pos_index, 6)+'.png', new_img_2)
                    img_pos_index += 1
                    
                # NEGATIVE CLASS ~ NO CHANGES
                for counter in contours:
                    # take random patch
                    cx = random.randint(self.dimx//2, img_gt.shape[1]-self.dimx//2)
                    cy = random.randint(self.dimy//2, img_gt.shape[0]-self.dimy//2)
                    
                    new_img_gt=img_gt[cy-self.dimy//2:cy+self.dimy//2, cx-self.dimx//2:cx+self.dimx//2]
                    new_img_1=img_1[cy-self.dimy//2:cy+self.dimy//2, cx-self.dimx//2:cx+self.dimx//2]
                    new_img_2=img_2[cy-self.dimy//2:cy+self.dimy//2, cx-self.dimx//2:cx+self.dimx//2]
                    
                    # augment one patch
                    new_img_1, new_img_2, new_img_gt = self.augment(new_img_1, new_img_2, new_img_gt)

                    cv2.imwrite(dir_name+'gt/'+'N'+pad_int_zeros(img_neg_index, 6)+'.png', new_img_gt)
                    cv2.imwrite(dir_name+'1/'+'N'+pad_int_zeros(img_neg_index, 6)+'.png', new_img_1)
                    cv2.imwrite(dir_name+'2/'+'N'+pad_int_zeros(img_neg_index, 6)+'.png', new_img_2)
                    img_neg_index += 1
            
                
        
    def generate_train_val_test_datasets(self, test_rate=0.1, val_rate=0.2, randomize_test=False):
        assert len(self.images_1) == len(self.images_2)
        assert len(self.images_1) == len(self.cdmaps)
        
        len_dataset = len(self.images_1)
        len_test_set = int(test_rate*len_dataset)
        
        if randomize_test:
            test_indicies = random.sample(range(len(self.images_1)), len_test_set)
        else:
            test_indicies = range(len(self.images_1))[-len_test_set:]
            
        train_val_indicies = []
        
        for i in range(len_dataset):
            if i not in test_indicies:
                train_val_indicies.append(i)
        
        self.remove_and_create_dirs()
        
        self.create_patches(TRAIN_VAL_TEST_DIR+'test/', test_indicies)
        self.create_patches(TRAIN_VAL_TEST_DIR+'train/', train_val_indicies)
        
        train_val_patches = os.listdir(TRAIN_VAL_TEST_DIR+'train/1/')
        val_indicies = random.sample(range(len(train_val_patches)), int(val_rate*len(train_val_patches)))
        val_patches = []
        for i in val_indicies:
            val_patches.append(train_val_patches[i])
        
        for patch in val_patches:
            os.rename(TRAIN_VAL_TEST_DIR+'train/1/'+patch, TRAIN_VAL_TEST_DIR+'val/1/'+patch)
            os.rename(TRAIN_VAL_TEST_DIR+'train/2/'+patch, TRAIN_VAL_TEST_DIR+'val/2/'+patch)
            os.rename(TRAIN_VAL_TEST_DIR+'train/gt/'+patch, TRAIN_VAL_TEST_DIR+'val/gt/'+patch)        
    

In [13]:
images_1 = []
images_2 = []
gt_array = []

In [14]:
# Dataset path preparation
if DATASET == 'AICD':
    INVERT_GT = False
    for i_scene in range(100):
        for i_view in range(5):
            img_dir = DATASET_DIR+'Images_NoShadow/'
            gt_dir = DATASET_DIR+'GroundTruth/'
            base_name = 'Scene'+pad_int_zeros(i_scene, 4)+'_View'+pad_int_zeros(i_view, 2)

            img_1 = img_dir+base_name+'_moving.png'
            img_2 = img_dir+base_name+'_target.png'
            gt = gt_dir+base_name+'_gtmask.png'
            if not((os.path.isfile(img_1)) and (os.path.isfile(img_2)) and (os.path.isfile(gt))):
                print(base_name+' not exists')
            else:
                images_1.append(img_1)
                images_2.append(img_2)
                gt_array.append(gt)
elif DATASET == 'TSUNAMI':
    INVERT_GT = True
    for i_image in range(100):
        base_name = pad_int_zeros(i_image, 8)

        img_1 = DATASET_DIR+'t0/'+base_name+'.jpg'
        img_2 = DATASET_DIR+'t1/'+base_name+'.jpg'
        gt = DATASET_DIR+'ground_truth/'+base_name+'.bmp'
        if not((os.path.isfile(img_1)) and (os.path.isfile(img_2)) and (os.path.isfile(gt))):
            print(img_1, img_2, gt, ' not exists')
        else:
            images_1.append(img_1)
            images_2.append(img_2)
            gt_array.append(gt)
else:
    raise ValueError('Dataset must be one of [\'AICD\', \'TSUNAMI\']')
        

In [15]:
dp = DataPreparator(images_1, images_2, gt_array, 240, 192, invert_gt=INVERT_GT)
dp.remove_and_create_dirs()
dp.generate_train_val_test_datasets()

100%|██████████| 50/50 [02:23<00:00,  2.86s/it]
100%|██████████| 450/450 [21:59<00:00,  2.93s/it]
