In [None]:
from time import time
t0 = time()

In [None]:
from PIL import Image
from PIL import ImageEnhance
from PIL import ImageFilter
import pandas as pd
import random
import numpy as np

datadir = "/home/ubuntu/kaggle/NaiveBees/"
#datadir = "F:/TheMetisChallenge-NaiveBeesClassifier/"


In [11]:
def presize(image, nsize=(256,256)):
    return image.resize(nsize, resample=Image.ANTIALIAS)

def rollh(image, deltah):
    "Roll an image horizontally"
    if deltah == 0 : return image
    image2 = Image.fromarray(np.asarray(image))
    xsize, ysize = image.size
    deltah = deltah % xsize
    if deltah > 0:
        part1 = image2.crop((0, 0, deltah, ysize))
        part2 = image2.crop((deltah, 0, xsize, ysize))
        image2.paste(part2, (0, 0, xsize-deltah, ysize))
        image2.paste(part1, (xsize-deltah, 0, xsize, ysize))
    else:
        part1 = image2.crop((xsize+deltah, 0, xsize, ysize))
        part2 = image2.crop((0, 0, xsize+deltah, ysize))
        image2.paste(part2, (-deltah, 0, xsize, ysize))
        image2.paste(part1, (0, 0, -deltah, ysize))
    return image2

def rollv(image, deltav):
    "Roll an image vertically"
    if deltav == 0 : return image
    image2 = Image.fromarray(np.asarray(image))
    xsize, ysize = image.size
    deltav = deltav % xsize
    part1 = image2.crop((0, 0, xsize, deltav))
    part2 = image2.crop((0, deltav, xsize, ysize))
    image2.paste(part2, (0, 0, xsize, ysize-deltav))
    image2.paste(part1, (0, ysize-deltav, xsize, ysize))
    return image2

def zoom(image, zoom_range):
    log_zoom_range = [np.log(z) for z in zoom_range]
    zoom = np.exp(np.random.uniform(*log_zoom_range))
    size_x, size_y = image.size
    im2 = image.copy()
    if zoom < 1:
        diffx = int(size_x * (1-zoom) / 2)
        diffy = int(size_y * (1-zoom) / 2)
        im2 = image.crop((diffx, diffy, size_x-diffx, size_y-diffy))
        im2 = presize(im2, (200,200))
    elif zoom > 1:
        max_x, max_y = (int(size_x*zoom), int(size_y*zoom))
        max_x = max_x if max_x % 2==0  else max_x -1
        max_y = max_y if max_y % 2==0  else max_y -1
        im1 = Image.new(mode="RGB",size=(max_x, max_y), color=colorAverage(image))
        hs = int((max_x-im2.size[0])/2)
        vs = int((max_y-im2.size[1])/2)
        im1.paste(im2, (hs,vs,max_x-hs,max_y-vs))
        im2 = presize(im1, (200,200))
    return im2

def colorAverage( img ):
    # grab width and height
    width, height = img.size
    # make a list of all pixels in the image
    pixels = img.load()
    data = []
    for x in range(width):
        for y in range(height):
            cpixel = pixels[x, y]
            data.append(cpixel)
    r = 0
    g = 0
    b = 0
    counter = 0
    # loop through all pixels
    # if alpha value is greater than 200/255, add it to the average
    # (note: could also use criteria like, if not a black pixel or not a white pixel...)
    for x in range(len(data)):
        try:
            if data[x][3] > 200:
                r+=data[x][0]
                g+=data[x][1]
                b+=data[x][2]
        except:
            r+=data[x][0]
            g+=data[x][1]
            b+=data[x][2]
        counter+=1
    # compute average RGB values
    rAvg = r/counter
    gAvg = g/counter
    bAvg = b/counter
    return (rAvg, gAvg, bAvg)

def smallRotation(image, angle):
    im1 = Image.new(mode="RGB",size=(256,256), color=colorAverage(image))
    im2 = image.copy()
    hs = int((256-im2.size[0])/2)
    vs = int((256-im2.size[1])/2)
    im1.paste(im2, (hs,vs,256-hs,256-vs))
    im2 = im1.rotate(angle, resample=Image.BICUBIC, expand=False)
    im2 = im2.crop((28,28,228,228))
    return im2

def randomEnhancement(image):
    e = random.randint(0,4)    
    if e == 1:
        ime = ImageEnhance.Color(image)
        outim = ime.enhance(1.75)
    elif e == 2:
        ime = ImageEnhance.Sharpness(image)
        outim = ime.enhance(1.75)
    elif e == 3: 
        ime = ImageEnhance.Contrast(image)
        outim = ime.enhance(1.25)
    elif e == 4: 
        ime = ImageEnhance.Brightness(image)
        outim = ime.enhance(1.25)
    else:
        outim = image.copy()
    return outim

def randomFilter(image):
    filters = [ImageFilter.BLUR, ImageFilter.DETAIL, ImageFilter.EDGE_ENHANCE, ImageFilter.SMOOTH, ImageFilter.SHARPEN, None]
    fi = filters[random.randint(0,5)]
    im2 = image.copy()
    if fi:
        im2 = im2.filter(fi)
    return im2

def perturb(imag, roll_range = (-10, 10), zoom_range = (1/1.5 , 1.5), rotation_range = (-180, 180), 
            flip=True, enhance=True, rfilter=True):
    outg = imag.copy()
    #Random Roll
    #dh = random.randint(roll_range[0], roll_range[1])
    #dv = random.randint(roll_range[0], roll_range[1])
    #outg = rollv(rollh(outg, dh), dv)
    #Random zoom
    outg = zoom(outg, zoom_range)
    #Random rotation
    rr = random.randint(rotation_range[0], rotation_range[1])
    outg = smallRotation(outg, rr)
    #Random flip (flip 60% of time: 30% horizontal flip, 30% vertical flip)
    fl = random.random()
    if flip and fl > 0.4:
        if fl > 0.7:
            outg = outg.transpose(Image.FLIP_LEFT_RIGHT)
        else:
            outg = outg.transpose(Image.FLIP_TOP_BOTTOM)
    #Random Enhancement
    if enhance:
        outg = randomEnhancement(outg)
    #Random Filter
    if rfilter:
        outg = randomFilter(outg)
    return presize(outg)

In [None]:
##Creation of Training and Validation Sets and augmentation of train set
print "Training and Validation sets"
labelsD = pd.read_csv(datadir + '/train.csv')

validationRatio = 0.2
notAugmentOnValidation = True
number_of_perturbed_images = 10

import shutil, os
for d in ["trainDir", "valDir"]:
    dir = "%s/%s" % (datadir, d)
    if not os.path.exists(dir):
        os.mkdir(dir)

for cl in ["Bombus", "Apis"]:
    dir = "%s/trainDir/%s" % (datadir, cl)
    if not os.path.exists(dir):
        os.mkdir(dir)
        
for cl in ["Bombus", "Apis"]:
    dir = "%s/valDir/%s" % (datadir, cl)
    if not os.path.exists(dir):
        os.mkdir(dir)

for element in labelsD.iterrows():
    v = random.random()
    if v < validationRatio:
        dir = "%s/valDir/%s" % (datadir, "Bombus" if element[1]['genus'] > 0.49 else "Apis")
    else:
        dir = "%s/trainDir/%s" % (datadir, "Bombus" if element[1]['genus'] > 0.49 else "Apis")
    
    origin = "%s/train/%d.jpg" % (datadir, element[1]['id'])
    dest = "%s/%d.jpg" % (dir, element[1]['id'])
    
    im = Image.open(origin)
    im0 = presize(im)
    im0.save(dest)
    
    if v < validationRatio and notAugmentOnValidation:
        continue
    
    for i in range(number_of_perturbed_images):
        pimage = perturb(im)
        dest1 = "%s/90%d%d.jpg" % (dir, i, element[1]['id'])
        pimage.save(dest1)
    

In [None]:
print "Testing set"

for d in ["trainDir", "testDir"]:
    dir = "%s/%s" % (datadir, d)
    if not os.path.exists(dir):
        os.mkdir(dir)

labelsT = pd.read_csv(datadir + '/SubmissionFormat.csv')

for element in labelsT.iterrows():
    dir = "%s/testDir/"  % (datadir)
    origin = "%s/test/%d.jpg" % (datadir, element[1]['id'])
    dest = "%s/%d.jpg" % (dir, element[1]['id'])
    shutil.copy(origin, dest)
    
    im = Image.open(origin)
    im0 = presize(im)
    im0.save(dest)
    
    for i in range(number_of_perturbed_images):
        pimage = perturb(im)
        dest1 = "%s/90%d%d.jpg" % (dir, i, element[1]['id'])
        pimage.save(dest1)

In [None]:
import os
dir = "%s/testDir/"  % (datadir)
testset = os.listdir(dir)
newtestset = ["%s%s" % (dir, x) for x in testset]
dfnts = pd.DataFrame(newtestset)
dfnts.to_csv(datadir + "/newtestset.csv", index=False)

In [None]:
print 'Done.'
print("... in %0.3fs" % (time() - t0))