In [7]:
# Read files from zip to csv and get labels
import pandas as pd
import zipfile
from sklearn.preprocessing import LabelEncoder

train = pd.read_csv('./leaf/train.csv')

test = pd.read_csv('./leaf/test.csv')


def labels_ids(train):
    label_map = dict(zip(train['id'], train['species']))
    classes = set(train['species'])
    return label_map, classes

label_map, classes = labels_ids(train)

In [8]:
## preserve aspect ratio whilst resizing
#http://stackoverflow.com/questions/9103257/resize-image-maintaining-aspect-ratio-and-making-portrait-and-landscape-images-e
from PIL import Image, ImageChops, ImageOps
from os import path, makedirs, scandir, system


# resize image and maintain asdpect ratio
def makeThumb(f_in, size=(96,96), pad=True):
    
    image = Image.open(f_in)
    image.thumbnail(size, Image.ANTIALIAS)
    
    
    #TODO: change this to add blank row
    if image.size[1]%2 != 0:
        image = image.crop((0,0, image.size[0], image.size[1]-1))
    image_size = image.size
    if pad:
        thumb = image.crop( (0, 0, size[0], size[1]) )

        offset_x = max( (size[0] - image_size[0]) // 2, 0 )
        offset_y = max( (size[1] - image_size[1]) // 2, 0 )

        thumb = ImageChops.offset(thumb, offset_x, offset_y)

    else:
        thumb = ImageOps.fit(image, size, Image.ANTIALIAS, (0.5, 0.5))
    return thumb

from matplotlib import pyplot as plt
from IPython import display

leaf_imgs = dict()

# Need to also save them depending on the right class
    

name_list = [entry.name for entry in scandir('./leaf/images') if entry.is_file()]

# system('rm -rf train')
# system('rm -rf valid')

from collections import Counter
count = Counter()
for i, f_in in enumerate(name_list[1:]):
    leaf_imgs[i] = makeThumb('./leaf/images/'+f_in)
    iden = int(f_in.split('.')[0])
    if iden in label_map.keys():

        label = label_map[iden]
        if count[label]<2: 
            directory = "./leaf/valid/"+label
            count[label] += 1
        else:
            directory = "./leaf/train/"+label

    else:
        directory = './leaf/test_imgs'
    if not path.exists(directory):
        makedirs(directory)
    leaf_imgs[i].save(directory+'/'+f_in,'JPEG')

In [9]:
## Augment data_set Using Keras
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

datagen = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.1,
        height_shift_range=0.1,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')

# the .flow() command below generates batches of randomly transformed images
# and saves the results to the `preview/` directory

if not path.exists("./leaf/aug_imgs"):
            makedirs("./leaf/aug_imgs")

train_generator = datagen.flow_from_directory(
        directory = './leaf/train',  # this is the target directory
        batch_size= 32,
        target_size = (96,96),
        class_mode='categorical',
        save_to_dir = './leaf/aug_imgs',
        color_mode ='grayscale'
        )


valid_generator = datagen.flow_from_directory(
        directory = './leaf/valid',  # this is the target directory
        batch_size= 32,
        target_size = (96, 96),
        class_mode='categorical',
        save_to_dir = './leaf/aug_imgs',
        color_mode ='grayscale'
        )

Found 791 images belonging to 99 classes.
Found 198 images belonging to 99 classes.


In [10]:
# Create the Keras model

from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense

model = Sequential()
model.add(Convolution2D(nb_filter=32, nb_row=3, nb_col=3, 
                        input_shape=(96, 96, 1), dim_ordering="tf"))

model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# layter 1

model.add(Convolution2D(32, 6, 6))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))


# layter 2

model.add(Convolution2D(32, 6, 6))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))


# layter 3

model.add(Convolution2D(64, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(4, 4)))


model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.1))
model.add(Dense(99))
model.add(Activation('sigmoid'))


model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [11]:
model.fit_generator(
        train_generator,
        samples_per_epoch=10000,
        nb_epoch=10,
        validation_data=valid_generator,
        nb_val_samples=32*10)

model.save_weights('first_try.h5')

Epoch 1/10
  983/10000 [=>............................] - ETA: 710s - loss: 10.6135 - acc: 0.0092

KeyboardInterrupt: 