## Data Preparation for U-net

Crop to 256x256 chunks and split randomly into train, validation and holdout set.

In [77]:
from __future__ import print_function
from scipy import misc

import cv2
import os
import numpy as np

In [121]:
def reshape_and_normalize_mask(mask):
    
    shape0 = mask.shape[0]//256 * 256
    shape1 = mask.shape[1]//256 * 256
    
    # get both to a shape divisible by 32

    mask = mask[:shape0, :shape1, :]
    
    # rescale all numbers between 0 and 1
    mask = mask.astype('float32')

    for i in range(mask.shape[2]):
        mask[:, :, i] = (mask[:, :, i] - mask[:, :, i].min())/ \
                                    (mask[:, :, i].max() - mask[:, :, i].min())
        
    return mask    

def crop256_and_train_valid_split(stacked, mask, filename):
    
    stacked = np.load(stacked)
    mask = np.load(mask)
    
    stacked = reshape_and_normalize_face(stacked)
    mask = reshape_and_normalize_mask(mask)
    
    size_crop = 256
    i = 0
    j = 0
    
    masks = []
    imgs = []

    for i in range(0, stacked.shape[0], size_crop):
        for j in range(0, stacked.shape[1], size_crop):
            m = mask[i:i+size_crop, j:j+size_crop,:]
            image = stacked[i:i+size_crop, j:j+size_crop, :]
            masks.append(m)
            imgs.append(image)
    
    np.random.seed(10)
    indices = np.arange(len(masks))
    np.random.shuffle(indices)

    i = 0

    # first 70% of shuffled into train
    p70 = int(.7*len(indices))
    p90 = int(.9*len(indices))

    # 70% in train
    for num in indices[:p70]:
        img_num = i
        np.save('/home/RGD/cleannpy/normalized/train/masks/' + filename+ '_' + str(img_num), masks[num])
        np.save('/home/RGD/cleannpy/normalized/train/imgs/'+ filename+ '_' + str(img_num), imgs[num])
        i +=1
    print('train complete')
    # 20% in test
    for num in indices[p70:p90]:
        img_num = i
        np.save('/home/RGD/cleannpy/normalized/validation/masks/'+ filename+ '_' + str(img_num), masks[num])
        np.save('/home/RGD/cleannpy/normalized/validation/imgs/'+ filename+ '_' + str(img_num), imgs[num])
        i +=1
    print('valid complete')
    # 10% to holdout
    for num in indices[p90:]:
        img_num = i
        np.save('/home/RGD/cleannpy/normalized/holdout/masks/'+ filename+ '_' + str(img_num), masks[num])
        np.save('/home/RGD/cleannpy/normalized/holdout/imgs/'+ filename+ '_' + str(img_num), imgs[num])
        i +=1
    print('holdout complete')

In [88]:
new_path='/home/RGD/cleannpy/fullsize_unnormalized'

In [89]:
pathlist = os.listdir(new_path)

In [91]:
fullpathface = sorted([os.path.join(new_path,x) for x in pathlist if 'Face' in x])
fullpathmask = sorted([os.path.join(new_path,x) for x in pathlist if 'Mask' in x])

In [95]:
fullpathface

['/home/RGD/cleannpy/fullsize_unnormalized/Face_0000086.npy',
 '/home/RGD/cleannpy/fullsize_unnormalized/Face_0000130.npy',
 '/home/RGD/cleannpy/fullsize_unnormalized/Face_0000182.npy',
 '/home/RGD/cleannpy/fullsize_unnormalized/Face_0000229.npy',
 '/home/RGD/cleannpy/fullsize_unnormalized/Face_0000274.npy',
 '/home/RGD/cleannpy/fullsize_unnormalized/Face_0000323.npy',
 '/home/RGD/cleannpy/fullsize_unnormalized/Face_0000368.npy',
 '/home/RGD/cleannpy/fullsize_unnormalized/Face_0000418.npy',
 '/home/RGD/cleannpy/fullsize_unnormalized/Face_0000463.npy',
 '/home/RGD/cleannpy/fullsize_unnormalized/Face_0000509.npy']

In [105]:
fullpathmask

['/home/RGD/cleannpy/fullsize_unnormalized/Mask_0000086.npy',
 '/home/RGD/cleannpy/fullsize_unnormalized/Mask_0000130.npy',
 '/home/RGD/cleannpy/fullsize_unnormalized/Mask_0000182.npy',
 '/home/RGD/cleannpy/fullsize_unnormalized/Mask_0000229.npy',
 '/home/RGD/cleannpy/fullsize_unnormalized/Mask_0000274.npy',
 '/home/RGD/cleannpy/fullsize_unnormalized/Mask_0000323.npy',
 '/home/RGD/cleannpy/fullsize_unnormalized/Mask_0000368.npy',
 '/home/RGD/cleannpy/fullsize_unnormalized/Mask_0000418.npy',
 '/home/RGD/cleannpy/fullsize_unnormalized/Mask_0000463.npy',
 '/home/RGD/cleannpy/fullsize_unnormalized/Mask_0000509.npy']

In [111]:
facenamelist = [x[-7:-4] for x in fullpathface]

In [112]:
masknamelist = [x[-7:-4] for x in fullpathmask]

In [92]:
len(fullpathface)

10

In [93]:
len(fullpathmask)

10

In [101]:
len(namelist)

10

In [122]:
for i,j,k in zip(fullpathface,fullpathmask,facenamelist):
    crop256_and_train_valid_split(i,j,k)
    print(i[-16:-4]+'\n'+ j[-16:-4] + 'done')

train complete
valid complete
holdout complete
Face_0000086
Mask_0000086done
train complete
valid complete
holdout complete
Face_0000130
Mask_0000130done
train complete
valid complete
holdout complete
Face_0000182
Mask_0000182done
train complete
valid complete
holdout complete
Face_0000229
Mask_0000229done
train complete
valid complete
holdout complete
Face_0000274
Mask_0000274done
train complete
valid complete
holdout complete
Face_0000323
Mask_0000323done
train complete
valid complete
holdout complete
Face_0000368
Mask_0000368done
train complete
valid complete
holdout complete
Face_0000418
Mask_0000418done
train complete
valid complete
holdout complete
Face_0000463
Mask_0000463done
train complete
valid complete
holdout complete
Face_0000509
Mask_0000509done
