## Image Preprocessor
This script preprocesses all images:
1. File names:  
in each subfolder (combination of size and speed), retrieve name of species/vehicle and use as prefix to rename image files
2. Aspect ratio: crop images from centre so that aspect ratio is 1.5
3. Size: resize to 300*200 pixels
4. Final rename: create folder called 'stimuli' and add images with (domain)_(size)_(speed)_(num).jpg naming convention

In [1]:
import numpy as np
from PIL import Image
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns
import os
import shutil
from glob import glob
import re

In [2]:
data_dir = '/media/timo/data/DPHIL_03_TRANSFERLEARN/datasets/grid/animals/'

### playground

In [3]:

# subfolders = os.listdir(data_dir)
# for ii,sf in enumerate(subfolders):
#     # retrieve name of animal/vehicle 
#     if len(os.listdir(data_dir + sf)):
#         fname = os.listdir(data_dir + sf)[0]         
#         # retrieve names of image files 
#         imfiles = os.listdir(data_dir + sf + '/' + fname)
#         impath = data_dir + sf + '/' + fname + '/'
#         # rename image files 
#         for jj, img in enumerate(imfiles):
#             os.rename(impath +img, impath + '../' + fname + str(jj+1) + '.jpg')
# #             shutil.rmtree(impath)
        


In [40]:

# data_dir = '/media/timo/data/DPHIL_03_TRANSFERLEARN/datasets/grid/animals/'
# subfolders = os.listdir(data_dir)
# for sf in subfolders:
#     rsfiles = glob(data_dir + sf + '/' + 'rs_*')
#     for f in rsfiles:
#         os.remove(f)

In [50]:

print(re.split('(\d+)','rs_Sportscar12.jpg'))
print(re.split('\d+','rs_Sportscar12.jpg'))

['rs_Sportscar', '12', '.jpg']
['rs_Sportscar', '.jpg']


In [49]:
re.split('\d+','rs_Sportscar12.jpg')[0]

'rs_Sportscar'

## example stimulus grids

In [3]:
%matplotlib qt

In [4]:
# animals:
f,axs = plt.subplots(5,5,figsize=(15,15))
axs = axs.ravel() 
for ax in axs:
    ax.set_xticks([])
    ax.set_yticks([])
plt.suptitle('Animals',fontsize=15,fontweight='bold')
data_dir = '/media/timo/data/DPHIL_03_TRANSFERLEARN/datasets/grid/animals/'
subfolders = os.listdir(data_dir)
for sf,ax in zip(subfolders,axs):
    # first image
    imfile = os.listdir(data_dir + sf)[0]
    # name of species
    typ = re.split('\d+',imfile)[0]
    # load image
    with Image.open(data_dir+sf+'/'+imfile) as img:
        ax.imshow(img)
        ax.set_title(sf +'\n' + typ)
    
f.tight_layout()

In [5]:
# vehicles:
f,axs = plt.subplots(5,5,figsize=(15,15))
axs = axs.ravel() 
for ax in axs:
    ax.set_xticks([])
    ax.set_yticks([])
plt.suptitle('Vehicles',fontsize=15,fontweight='bold')
data_dir = '/media/timo/data/DPHIL_03_TRANSFERLEARN/datasets/grid/vehicles/'
subfolders = os.listdir(data_dir)
for sf,ax in zip(subfolders,axs):
    # first image
    imfile = os.listdir(data_dir + sf)[0]
    # name of species
    typ = re.split('\d+',imfile)[0]
    # load image
    with Image.open(data_dir+sf+'/'+imfile) as img:
        ax.imshow(img)
        ax.set_title(sf +'\n' + typ)
    
plt.tight_layout()

In [80]:
# im_path = '/media/timo/data/DPHIL_03_TRANSFERLEARN/datasets/grid/'
# domains = ['animals','objects']
# in_suffix = '_rs'
# out_suffix = '_expt'
# prefixes = ['an','ve']
# out_dir = im_path + domains[0] + out_suffix + '/'
# if not os.path.exists(out_dir):
#     os.mkdir(out_dir)

In [81]:
os.path.exists(out_dir)

True

## crop all images to same aspect ratio
Crop all images so that they have an aspect ratio of 1.5

In [22]:
def set_widthheight_to_ar(img,w_ref=300,h_ref=200):
    '''
    computes new width and height given a desired aspect ratio
    '''
    # get widths and heights     
    w2 = np.asarray(img).shape[1]
    h2 = np.asarray(img).shape[0]
    
    # get aspect ratios
    ar_ref = w_ref/h_ref
    ar2 = w2/h2
    
    if ar_ref<ar2:
        h_new = h2
        w_new = w_ref*h2//h_ref
    elif ar_ref > ar2:
        w_new = w2
        h_new = w2*h_ref//w_ref
    else:
        w_new = w2
        h_new = h2
    return w_new,h_new

def crop_center(img,cropw,croph):
    h,w,_ = img.shape
    startw = w//2-(cropw//2)
    starth = h//2-(croph//2)
    return img[starth:starth+croph,startw:startw+cropw,:]


def crop_same_aspect(img,w_ref=300,h_ref=300):
    '''
    crops img so that it has same aspect ratio as between w_ref and h_ref
    '''
    # get new width and height
    w_new,h_new = set_widthheight_to_ar(img,w_ref=w_ref,h_ref=h_ref)
    # get centre
    return Image.fromarray(crop_center(np.asarray(img),w_new,h_new))
    

def wrapper_cropimages(data_dir,out_dir):
    newprefix = 'ar_'
    try:
        os.listdir(out_dir)
    except:
        os.mkdir(out_dir)

    subfolders = os.listdir(data_dir)
    for ii,sf in enumerate(subfolders):
        imfiles = os.listdir(data_dir + sf)
        print('working through subfolder ' + sf)    
        # create output directory if doesn't exist
        try:
            os.listdir(out_dir+sf)
        except:
            os.mkdir(out_dir+sf)
        #loop over images and resize
        for imname in imfiles:
            with Image.open(data_dir+sf+'/'+imname) as img_src:
                # copy only the rgb channels
                try:
                    h,w,c = np.array(img_src).shape
                    if c==4:
                        img = Image.new("RGB", img_src.size, (255, 255, 255))
                        img.paste(img_src, mask=img_src.split()[3]) # 3 is the alpha channel
                    else:
                        img = img_src.copy()
                except:
                    img = img_src.convert('RGB')

                # change aspect ratio
                img = crop_same_aspect(img,w_ref=300,h_ref=200)            
                # save with prefix
                img.save(out_dir+sf+'/'+newprefix+imname.split('.')[0]+'.jpg',format='JPEG', subsampling=0, quality=100)



In [21]:
# animals
data_dir = '/media/timo/data/DPHIL_03_TRANSFERLEARN/datasets/grid/animals/'
out_dir = '/media/timo/data/DPHIL_03_TRANSFERLEARN/datasets/grid/animals_sameaspect/'
wrapper_cropimages(data_dir,out_dir)


working through subfolder size1_speed1
working through subfolder size1_speed2
working through subfolder size1_speed3
working through subfolder size1_speed4
working through subfolder size1_speed5
working through subfolder size2_speed1
working through subfolder size2_speed2
working through subfolder size2_speed3
working through subfolder size2_speed4
working through subfolder size2_speed5
working through subfolder size3_speed1
working through subfolder size3_speed2
working through subfolder size3_speed3
working through subfolder size3_speed4
working through subfolder size3_speed5
working through subfolder size4_speed1
working through subfolder size4_speed2
working through subfolder size4_speed3
working through subfolder size4_speed4
working through subfolder size4_speed5
working through subfolder size5_speed1
working through subfolder size5_speed2
working through subfolder size5_speed3
working through subfolder size5_speed4
working through subfolder size5_speed5


In [23]:
# vehicles
data_dir = '/media/timo/data/DPHIL_03_TRANSFERLEARN/datasets/grid/vehicles/'
out_dir = '/media/timo/data/DPHIL_03_TRANSFERLEARN/datasets/grid/vehicles_sameaspect/'
wrapper_cropimages(data_dir,out_dir)

working through subfolder size1_speed1
working through subfolder size1_speed2
working through subfolder size1_speed3
working through subfolder size1_speed4
working through subfolder size1_speed5
working through subfolder size2_speed1
working through subfolder size2_speed2
working through subfolder size2_speed3
working through subfolder size2_speed4
working through subfolder size2_speed5
working through subfolder size3_speed1
working through subfolder size3_speed2
working through subfolder size3_speed3
working through subfolder size3_speed4
working through subfolder size3_speed5
working through subfolder size4_speed1
working through subfolder size4_speed2
working through subfolder size4_speed3
working through subfolder size4_speed4
working through subfolder size4_speed5
working through subfolder size5_speed1
working through subfolder size5_speed2
working through subfolder size5_speed3
working through subfolder size5_speed4
working through subfolder size5_speed5


## resize images

In [26]:
def wrapper_resize_images(data_dir,out_dir,newWidth,newprefix):
    try:
        os.listdir(out_dir)
    except:
        os.mkdir(out_dir)
    subfolders = os.listdir(data_dir)
    for ii,sf in enumerate(subfolders):
        imfiles = os.listdir(data_dir + sf)
        print('working through subfolder ' + sf)    
        # create output directory if doesn't exist
        try:
            os.listdir(out_dir+sf)
        except:
            os.mkdir(out_dir+sf)
        #loop over images and resize
        for imname in imfiles:
            with Image.open(data_dir+sf+'/'+imname) as img_src:
                # copy only the rgb channels
                try:
                    h,w,c = np.array(img_src).shape
                    if c==4:
                        img = Image.new("RGB", img_src.size, (255, 255, 255))
                        img.paste(img_src, mask=img_src.split()[3]) # 3 is the alpha channel
                    else:
                        img = img_src.copy()
                except:
                    img = img_src.convert('RGB')
                # get curent size
                imsize = img.size
                # get resize factor
                rf = newWidth/imsize[0]
                # set new size
                newsize = [int(x*rf) for x in imsize]
                img = img.resize(newsize,Image.ANTIALIAS)
                # save with prefix
                img.save(out_dir+sf+'/'+newprefix+imname.split('.')[0]+'.jpg',format='JPEG', subsampling=0, quality=100)



In [27]:
# resize animals
data_dir = '/media/timo/data/DPHIL_03_TRANSFERLEARN/datasets/grid/animals_sameaspect/'
out_dir = '/media/timo/data/DPHIL_03_TRANSFERLEARN/datasets/grid/animals_resized/'
newWidth = 300 # 300 pixels
newprefix = 'rs_'
wrapper_resize_images(data_dir,out_dir,newWidth,newprefix)

working through subfolder size1_speed1
working through subfolder size1_speed2
working through subfolder size1_speed3
working through subfolder size1_speed4
working through subfolder size1_speed5
working through subfolder size2_speed1
working through subfolder size2_speed2
working through subfolder size2_speed3
working through subfolder size2_speed4
working through subfolder size2_speed5
working through subfolder size3_speed1
working through subfolder size3_speed2
working through subfolder size3_speed3
working through subfolder size3_speed4
working through subfolder size3_speed5
working through subfolder size4_speed1
working through subfolder size4_speed2
working through subfolder size4_speed3
working through subfolder size4_speed4
working through subfolder size4_speed5
working through subfolder size5_speed1
working through subfolder size5_speed2
working through subfolder size5_speed3
working through subfolder size5_speed4
working through subfolder size5_speed5


In [28]:
# resize vehicles
data_dir = '/media/timo/data/DPHIL_03_TRANSFERLEARN/datasets/grid/vehicles_sameaspect/'
out_dir = '/media/timo/data/DPHIL_03_TRANSFERLEARN/datasets/grid/vehicles_resized/'
newWidth = 300 # 300 pixels
newprefix = 'rs_'
wrapper_resize_images(data_dir,out_dir,newWidth,newprefix)

working through subfolder size1_speed1
working through subfolder size1_speed2
working through subfolder size1_speed3
working through subfolder size1_speed4
working through subfolder size1_speed5
working through subfolder size2_speed1
working through subfolder size2_speed2
working through subfolder size2_speed3
working through subfolder size2_speed4
working through subfolder size2_speed5
working through subfolder size3_speed1
working through subfolder size3_speed2
working through subfolder size3_speed3
working through subfolder size3_speed4
working through subfolder size3_speed5
working through subfolder size4_speed1
working through subfolder size4_speed2
working through subfolder size4_speed3
working through subfolder size4_speed4
working through subfolder size4_speed5
working through subfolder size5_speed1
working through subfolder size5_speed2
working through subfolder size5_speed3
working through subfolder size5_speed4
working through subfolder size5_speed5


In [38]:
# same for vehicles
data_dir = '/media/timo/data/DPHIL_03_TRANSFERLEARN/datasets/grid/vehicles/'
out_dir = '/media/timo/data/DPHIL_03_TRANSFERLEARN/datasets/grid/vehicles_rs/'
newWidth = 300 # 300 pixels
newprefix = 'rs_'

subfolders = os.listdir(data_dir)
for ii,sf in enumerate(subfolders):
    imfiles = os.listdir(data_dir + sf)
    print('working through subfolder ' + sf)    
    # create output directory if doesn't exist
    try:
        os.listdir(out_dir+sf)
    except:
        os.mkdir(out_dir+sf)
    #loop over images and resize
    for imname in imfiles:
        with Image.open(data_dir+sf+'/'+imname) as img_src:
            # copy only the rgb channels
            try:
                h,w,c = np.array(img_src).shape
                if c==4:
                    img = Image.new("RGB", img_src.size, (255, 255, 255))
                    img.paste(img_src, mask=img_src.split()[3]) # 3 is the alpha channel
                else:
                    img = img_src.copy()
            except:
                img = img_src.convert('RGB')
            # get curent size
            imsize = img.size
            # get resize factor
            rf = newWidth/imsize[0]
            # set new size
            newsize = [int(x*rf) for x in imsize]
            img = img.resize(newsize,Image.ANTIALIAS)
            # save with prefix
            img.save(out_dir+sf+'/'+newprefix+imname.split('.')[0]+'.jpg',format='JPEG', subsampling=0, quality=100)
            
            

working through subfolder size1_speed1
working through subfolder size1_speed2
working through subfolder size1_speed3
working through subfolder size1_speed4
working through subfolder size1_speed5
working through subfolder size2_speed1
working through subfolder size2_speed2
working through subfolder size2_speed3
working through subfolder size2_speed4
working through subfolder size2_speed5
working through subfolder size3_speed1
working through subfolder size3_speed2
working through subfolder size3_speed3
working through subfolder size3_speed4
working through subfolder size3_speed5
working through subfolder size4_speed1
working through subfolder size4_speed2
working through subfolder size4_speed3
working through subfolder size4_speed4
working through subfolder size4_speed5
working through subfolder size5_speed1
working through subfolder size5_speed2
working through subfolder size5_speed3
working through subfolder size5_speed4
working through subfolder size5_speed5


## rename files for js expt
replace species name with size/speed vals and domain prefix

In [31]:
im_path = '/media/timo/data/DPHIL_03_TRANSFERLEARN/datasets/grid/'
domains = ['animals','vehicles']
in_suffix = '_resized'
stim_dir = 'stimuli_final'
prefixes = ['an','ve']
n_img_per_cat = 10 # how many images max per category
out_path = im_path + stim_dir + '/'
if not os.path.exists(out_path):
    os.mkdir(out_path)

for idx,dom in enumerate(domains):
    proj_path = im_path + dom + in_suffix + '/'    
    # obtain names of all subfolders (size x speed)
    subfolders = os.listdir(proj_path)
    # loop over subfolders 
    for sf in subfolders:
        # loop over images 
        images = os.listdir(proj_path + sf)
        for img in images:
            imgnum = re.split('(\d+)',img)[1]
            stim_in = proj_path+sf+'/'+img
            stim_out = out_path+prefixes[idx]+'_'+sf+'_'+imgnum+'.jpg'
            if int(imgnum) <=n_img_per_cat:
                shutil.copy(stim_in,stim_out)
            
    

