In [1]:
import os
import numpy as np
import cv2
from matplotlib import pyplot as plt
from tqdm import tqdm
import random
import shutil
%matplotlib inline

In [2]:
def pad_int_zeros(i, num_digits):
    res = str(i)
    for j in range(num_digits-len(res)):
        res = '0'+res
    return res

In [3]:
TRAIN_VAL_TEST_DIR='data/train_test_val/'

In [4]:
class DataPreparator:
    def __init__(self, images_1, images_2, cdmaps, dimx, dimy, invert_gt):
        self.images_1 = images_1
        self.images_2 = images_2
        self.cdmaps = cdmaps
        self.dimx = dimx
        self.dimy = dimy
        self.train_and_val = []
        self.test = []
        self.invert_gt=invert_gt
        
    def remove_and_create_dirs(self):
        try:
            shutil.rmtree(TRAIN_VAL_TEST_DIR)
        finally:
            os.mkdir(TRAIN_VAL_TEST_DIR)
            dir_names=['1/', '2/', 'gt/']
            os.mkdir(TRAIN_VAL_TEST_DIR+'train/')
            os.mkdir(TRAIN_VAL_TEST_DIR+'val/')
            os.mkdir(TRAIN_VAL_TEST_DIR+'test/')
            
            for dir_name in dir_names:
                os.mkdir(TRAIN_VAL_TEST_DIR+'train/'+dir_name)
                os.mkdir(TRAIN_VAL_TEST_DIR+'val/'+dir_name)
                os.mkdir(TRAIN_VAL_TEST_DIR+'test/'+dir_name)
                
    def augment(self, aug, new_img_1, new_img_2, new_img_gt):
        if aug[0]:
            if random.random()>0.5:
                new_img_gt = new_img_gt[::-1, ::]
                new_img_1 = new_img_1[::-1, ::]
                new_img_2 = new_img_2[::-1, ::]
        if aug[1]:
            if random.random()>0.5:
                new_img_gt = new_img_gt[::, ::-1]
                new_img_1 = new_img_1[::, ::-1]
                new_img_2 = new_img_2[::, ::-1]
        if aug[2]:
            if random.random()>0.5:
                rows,cols = new_img_gt.shape
                M = cv2.getRotationMatrix2D((cols/2,rows/2),180,1)
                new_img_gt = cv2.warpAffine(new_img_gt,M,(cols,rows))
                new_img_1 = cv2.warpAffine(new_img_1,M,(cols,rows))
                new_img_2 = cv2.warpAffine(new_img_2,M,(cols,rows))
        return new_img_1, new_img_2, new_img_gt

            
            
    def create_patches(self, dir_name, dataset_indicies, num_samples=10, aug=[True, True, True]):
        img_pos_index=0
        img_neg_index=0
        for i in tqdm(range(len(dataset_indicies))):
            for j in (range(num_samples)):
                # POSITIVES
                img_1 = cv2.imread(self.images_1[dataset_indicies[i]])
                img_2 = cv2.imread(self.images_2[dataset_indicies[i]])
                img_gt = cv2.imread(self.cdmaps[dataset_indicies[i]],0)
                
                if self.invert_gt:
                    img_gt = cv2.bitwise_not(img_gt)
                
                ret,thresh = cv2.threshold(img_gt,127,255,0)
                im2, contours, hierarchy = cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
                for counter in contours:
                    M = cv2.moments(counter)
                    #print (counter)
                    if M['m00'] == 0:
                        continue
                    cx = int(M['m10']/M['m00'])
                    cy = int(M['m01']/M['m00'])
                    
                    cx = cx + (random.random()*2-1)*0.15*img_gt.shape[1]
                    cy = cy + (random.random()*2-1)*0.15*img_gt.shape[0]
                    
                    cx = int(np.clip(cx, self.dimx//2, img_gt.shape[1]-self.dimx//2))
                    cy = int(np.clip(cy, self.dimy//2, img_gt.shape[0]-self.dimy//2))

                    new_img_gt = img_gt[cy-self.dimy//2:cy+self.dimy//2, cx-self.dimx//2:cx+self.dimx//2]
                    new_img_1 = img_1[cy-self.dimy//2:cy+self.dimy//2, cx-self.dimx//2:cx+self.dimx//2]
                    new_img_2 = img_2[cy-self.dimy//2:cy+self.dimy//2, cx-self.dimx//2:cx+self.dimx//2]
                    
                    new_img_1, new_img_2, new_img_gt = self.augment(aug, new_img_1, new_img_2, new_img_gt)

                    cv2.imwrite(dir_name+'gt/'+'P'+pad_int_zeros(img_pos_index, 6)+'.png', new_img_gt)
                    cv2.imwrite(dir_name+'1/'+'P'+pad_int_zeros(img_pos_index, 6)+'.png', new_img_1)
                    cv2.imwrite(dir_name+'2/'+'P'+pad_int_zeros(img_pos_index, 6)+'.png', new_img_2)
                    img_pos_index += 1
                    
                # NEGATIVES
                for counter in contours:
                    cx = random.randint(self.dimx//2, img_gt.shape[1]-self.dimx//2)
                    cy = random.randint(self.dimy//2, img_gt.shape[0]-self.dimy//2)
                    
                    new_img_gt=img_gt[cy-self.dimy//2:cy+self.dimy//2, cx-self.dimx//2:cx+self.dimx//2]
                    new_img_1=img_1[cy-self.dimy//2:cy+self.dimy//2, cx-self.dimx//2:cx+self.dimx//2]
                    new_img_2=img_2[cy-self.dimy//2:cy+self.dimy//2, cx-self.dimx//2:cx+self.dimx//2]
                    
                    new_img_1, new_img_2, new_img_gt = self.augment(aug, new_img_1, new_img_2, new_img_gt)

                    cv2.imwrite(dir_name+'gt/'+'N'+pad_int_zeros(img_neg_index, 6)+'.png', new_img_gt)
                    cv2.imwrite(dir_name+'1/'+'N'+pad_int_zeros(img_neg_index, 6)+'.png', new_img_1)
                    cv2.imwrite(dir_name+'2/'+'N'+pad_int_zeros(img_neg_index, 6)+'.png', new_img_2)
                    img_neg_index += 1
            
                
        
    def generate_train_val_test_datasets(self, test_rate=0.01, val_rate=0.2):
        assert len(self.images_1) == len(self.images_2)
        assert len(self.images_1) == len(self.cdmaps)
        len_dataset = len(self.images_1)
        len_test_set = int(test_rate*len_dataset)
        test_indicies = random.sample(range(len(self.images_1)), len_test_set)
        train_val_indicies = []
        
        for i in range(len_dataset):
            if i not in test_indicies:
                train_val_indicies.append(i)
        
        self.remove_and_create_dirs()
        
        self.create_patches(TRAIN_VAL_TEST_DIR+'test/', test_indicies)
        self.create_patches(TRAIN_VAL_TEST_DIR+'train/', train_val_indicies)
        
        train_val_patches = os.listdir(TRAIN_VAL_TEST_DIR+'train/1/')
        val_indicies = random.sample(range(len(train_val_patches)), int(val_rate*len(train_val_patches)))
        val_patches = []
        for i in val_indicies:
            val_patches.append(train_val_patches[i])
        
        for patch in val_patches:
            os.rename(TRAIN_VAL_TEST_DIR+'train/1/'+patch, TRAIN_VAL_TEST_DIR+'val/1/'+patch)
            os.rename(TRAIN_VAL_TEST_DIR+'train/2/'+patch, TRAIN_VAL_TEST_DIR+'val/2/'+patch)
            os.rename(TRAIN_VAL_TEST_DIR+'train/gt/'+patch, TRAIN_VAL_TEST_DIR+'val/gt/'+patch)        
    

In [8]:
#AICD Dataset preparation

DATASET_DIR = 'data/AICDDataset/'
images_1 = []
images_2 = []
gt_array = []

for i_scene in range(100):
    for i_view in range(5):
        img_dir = DATASET_DIR+'Images_NoShadow/'
        gt_dir = DATASET_DIR+'GroundTruth/'
        base_name = 'Scene'+pad_int_zeros(i_scene, 4)+'_View'+pad_int_zeros(i_view, 2)
        
        img_1 = img_dir+base_name+'_moving.png'
        img_2 = img_dir+base_name+'_target.png'
        gt = gt_dir+base_name+'_gtmask.png'
        if not((os.path.isfile(img_1)) and (os.path.isfile(img_2)) and (os.path.isfile(gt))):
            print(base_name+' not exists')
        else:
            images_1.append(img_1)
            images_2.append(img_2)
            gt_array.append(gt)
        

In [6]:
#TSUNAMI Dataset preparation

DATASET_DIR = 'data/TSUNAMI/'
images_1 = []
images_2 = []
gt_array = []

for i_image in range(100):
    base_name = pad_int_zeros(i_image, 8)

    img_1 = DATASET_DIR+'t0/'+base_name+'.jpg'
    img_2 = DATASET_DIR+'t1/'+base_name+'.jpg'
    gt = DATASET_DIR+'ground_truth/'+base_name+'.bmp'
    if not((os.path.isfile(img_1)) and (os.path.isfile(img_2)) and (os.path.isfile(gt))):
        print(img_1, img_2, gt, ' not exists')
    else:
        images_1.append(img_1)
        images_2.append(img_2)
        gt_array.append(gt)

In [9]:
dp = DataPreparator(images_1, images_2, gt_array, 240, 192, invert_gt=True)
dp.remove_and_create_dirs()
dp.generate_train_val_test_datasets()


  0%|          | 0/1 [00:00<?, ?it/s][A
Exception in thread Thread-5:
Traceback (most recent call last):
  File "/home/user/anaconda3/envs/tensorflow/lib/python3.5/threading.py", line 914, in _bootstrap_inner
    self.run()
  File "/home/user/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tqdm/_tqdm.py", line 144, in run
    for instance in self.tqdm_cls._instances:
  File "/home/user/anaconda3/envs/tensorflow/lib/python3.5/_weakrefset.py", line 60, in __iter__
    for itemref in self.data:
RuntimeError: Set changed size during iteration

100%|██████████| 1/1 [00:05<00:00,  5.42s/it]
100%|██████████| 99/99 [08:38<00:00,  5.24s/it]


In [13]:
len(os.listdir('data/train_test_val_tsunami_reduced/'+'val/gt/'))//5

3250

In [6]:
len(images_2)

500