### Resize2

This notebook resizes all the pictures for the specified set to the dimensions specified by the size variable. The method resizes the shorter side of the image to the specified value and then crops the other dimension at the center while keeping the same size for both dimensions. 

Folders 'data/resized2/train', 'data/resized2/validation', 'data/resized2/test' need to be created prior to the execution of this notebook.

In [1]:
# Imports de base
import os
import pandas as pd

# Scikit Learn
from skimage import io
from skimage.transform import resize
from skimage.util import img_as_ubyte
from skimage.util import crop

# Parallelisation
from joblib import Parallel, delayed

In [2]:
size = 280

threads = 6
subset = 'validation'

folder = "."+ os.sep + "data" + os.sep
file =  subset + os.sep + subset + ".csv"

df = pd.read_csv(folder + file,sep=",")
df.head()

Unnamed: 0,observationid,mediaid,vote,content,classid,family,genus,species,author,date,location,latitude,longitude,yearinclef,observationid2014,imageid2014,learntag
0,31094,37775,3.0,Flower,691,Asteraceae,Cirsium,Cirsium vulgare (Savi) Ten.,mathieu menand,2006-6-15,L'Île-d'Olonne,,,PlantCLEF2014,13734.0,40168.0,Train
1,25955,54120,3.0,Flower,30269,Papaveraceae,Papaver,Papaver rhoeas L.,thierry pernot,1800-1-1,,,,PlantCLEF2014,4094.0,18147.0,Train
2,13483,11552,5.0,Flower,30269,Papaveraceae,Papaver,Papaver rhoeas L.,liliane roubaudi,2014-6-21,Meyrié,,,PlantCLEF2015,,,Train
3,35491,109516,4.0,Flower,6509,Orchidaceae,Ophrys,Ophrys passionis Sennen,genevieve botti,2014-3-15,Tarascon,,,PlantCLEF2015,,,Train
4,11537,43814,3.5,Flower,493,Asteraceae,Bellis,Bellis perennis L.,martin serre,2012-3-22,Indre,47.19371,-1.6805,PlantCLEF2014,690.0,28957.0,Train


In [3]:
def resize2(loadpath, savepath, fname):
    image = io.imread(folder + subset + os.sep +str(fname))

    if image.shape[0] > image.shape[1]:
        #print('x > y')
        percent = (size/float(image.shape[1]))
        vsize = int((float(image.shape[0])*float(percent)))
        image = resize(image, (vsize,size), anti_aliasing=True)
        x = int((image.shape[0]-size)/2)
        y = int((image.shape[1]-size)/2)    
        if image.shape[0] % 2 == 1:
            image = crop(image,((x+1,x),(y,y),(0,0)))
        else:
            image = crop(image,((x,x),(y,y),(0,0)))
    else: 
        if image.shape[0] < image.shape[1]:
            #print('x < y')
            percent = (size/float(image.shape[0]))
            vsize = int((float(image.shape[1])*float(percent)))
            image = resize(image, (size,vsize), anti_aliasing=True)
            x = int((image.shape[0]-size)/2)
            y = int((image.shape[1]-size)/2)    
            if image.shape[1] % 2 == 1:
                image = crop(image,((x,x),(y+1,y),(0,0)))
            else:
                image = crop(image,((x,x),(y,y),(0,0)))
        else:
            #print('x = y')
            image = resize(image, (size,size), anti_aliasing=True)
    
    io.imsave(fname = savepath + fname, arr = image)
    return image

In [4]:
loadpath = folder + subset + os.sep
savepath = folder + 'resized2' + os.sep + subset + os.sep

In [5]:
Parallel(n_jobs=threads)(delayed(resize2)(loadpath, savepath, str(i) + '.jpg') for i in df['mediaid']);