# Test Image Preprocessing

In order to run all of the neural networks with the same set of images, we will do all of our image preprocessing and augmentation beforehand.

In [1]:
import os
import glob
from PIL import Image
from PIL.ImageOps import mirror
import matplotlib.pyplot as plt
import seaborn as sn
import numpy as np
import math
%matplotlib inline

In [2]:
train_data_pth = os.getcwd() + '/../data_raw/train/'
train_data_out = os.getcwd() + '/../data/train/'
validation_data_pth = os.getcwd() + '/../data_raw/validation/'
validation_data_out = os.getcwd() + '/../data/validation/'
img_width, img_height = 80, 80
input_shape = (img_width, img_height, 3)
folders = ['bart', 'hommer', 'lisa', 'marge']
skew_fraction = 0.1
zoom_fraction = 0.2
n_input = 30 #300
n_valid = 6  #50
random_permutations_per_image = 10

## Prepare the training images

In [3]:
np.random.seed(42)
for i in range(len(folders)):
    print("Working on directory: " + folders[i])
    files = glob.glob(train_data_pth + folders[i] + '/*.jpg')

    # We will take a random sampling of the files
    files = np.random.choice(files,n_input,replace=False)
    
    for f in files:
        head, file = os.path.split(f)
        filename, file_extension = os.path.splitext(file)
        img = Image.open(f)
        org_width, org_height = img.size
        for k in range(random_permutations_per_image):
            # Pick which changes we make:
            picknum = np.random.rand()
            # Flip half of the images
            if k>0 and (picknum > 0.5):
                # Mirror
                img = mirror(img)                
            
            # Skew 30% of the imagess
            if k>1 and (picknum > 0.7):  
                # skew
                m = np.random.uniform(-skew_fraction,skew_fraction)
                xshift = int(round(abs(m) * org_width))
                new_width = org_width + xshift
                img = img.transform((new_width, org_height), Image.AFFINE,
                        (1, m, -xshift if m > 0 else 0, 0, 1, 0), Image.BICUBIC)
                # Crop the skew
                if (m < 0):
                    crop_dims = (xshift,0,org_width-xshift,org_height)
                else:
                    crop_dims = (0,0,org_width-xshift,org_height)
                img = img.crop(crop_dims)
            # Zoom 80% of the images
            if k>1 and (picknum > 0.2): 
                # Zoom
                z = np.random.uniform(-zoom_fraction,zoom_fraction)
                org_width, org_height = img.size
                basewidth = int(org_width*(1+z))
                wpercent = (basewidth/float(org_width))
                hsize = int((float(org_height)*float(wpercent)))
                if basewidth > org_width:
                    #Zoom out by padding
                    old_size = img.size
                    new_size = (basewidth, hsize)
                    new_im = Image.new("RGB", new_size)   ## luckily, this is already black!
                    new_im.paste(img, (int((new_size[0]-old_size[0])/2),int((new_size[1]-old_size[1])/2)))
                    img = new_im
                    
                else:
                    #Zoom in by cropping
                    crop_dims = ((org_width-basewidth)/2,(org_height - hsize)/2,(org_width-basewidth)/2 + basewidth, (org_height - hsize)/2 + hsize)
                    img = img.crop(crop_dims)
                
        
        
            img2 = img.resize((img_width,img_height), Image.ANTIALIAS)
           
            img2.save(train_data_out + folders[i] + "/" + filename + "_{}_prep".format(k) + file_extension)

    
            

Working on directory: bart
Working on directory: hommer
Working on directory: lisa
Working on directory: marge


## Prepare the validation images

In [4]:
np.random.seed(17)
for i in range(len(folders)):
    print("Working on directory: " + folders[i])
    files = glob.glob(validation_data_pth + folders[i] + '/*.jpg')

    # We will take a random sampling of the files
    files = np.random.choice(files,n_valid,replace=False)
    
    for f in files:
        head, file = os.path.split(f)
        filename, file_extension = os.path.splitext(file)
        img = Image.open(f)
        org_width, org_height = img.size
        for k in range(random_permutations_per_image):
            # Pick which changes we make:
            
            # Flip half of the images
            if (np.random.rand() > 0.5):
                # Mirror
                img = mirror(img)                
            
            # Skew 30% of the imagess
            if (np.random.rand() > 0.7):  
                # skew
                m = np.random.uniform(-skew_fraction,skew_fraction)
                xshift = int(round(abs(m) * org_width))
                new_width = org_width + xshift
                img = img.transform((new_width, org_height), Image.AFFINE,
                        (1, m, -xshift if m > 0 else 0, 0, 1, 0), Image.BICUBIC)
            
            # Zoom 80% of the images
            if (np.random.rand() > 0.2): 
                # Zoom
                z = np.random.uniform(-zoom_fraction,zoom_fraction)
                basewidth = int(org_width*(1+z))
                wpercent = (basewidth/float(org_width))
                hsize = int((float(org_height)*float(wpercent)))
                img = img.resize((basewidth,hsize), Image.ANTIALIAS)
        
        
            width, height = img.size

            # Pick a random location to crop
            start_width = np.random.randint(0,int((width - img_width)/2))
            start_height = np.random.randint(0,int((height - img_height)/2))

            crop_dims = (start_width,start_height,img_width+start_width,img_height+start_height)
            img2 = img.crop(crop_dims)
           
            img2.save(validation_data_out + folders[i] + "/" + filename + "_{}_prep".format(k) + file_extension)

Working on directory: bart
Working on directory: hommer
Working on directory: lisa
Working on directory: marge
