In [1]:
import os
from PIL import Image
import numpy as np
import pandas as pd

In [2]:
#Load the data

labels = pd.read_csv("../Data/train_labels.csv")
submissionFormat = pd.read_csv("../Data/SubmissionFormat.csv")

In [3]:
labels.head()

Unnamed: 0,id,genus
0,520,1
1,3800,1
2,3289,1
3,2695,1
4,4922,1


In [4]:
submissionFormat.head()

Unnamed: 0,id,genus
0,2783,0.5
1,2175,0.5
2,4517,0.5
3,2831,0.5
4,3556,0.5


In [5]:
print submissionFormat.shape, labels.shape

(992, 2) (3969, 2)


In [6]:
def get_image(filename, folder, size):
    filepath = os.path.join("../Data/images", folder, filename + ".jpg")
    image = Image.open(filepath)
    image.thumbnail((size, size), Image.ANTIALIAS)
    return (np.array(image, dtype=np.int32)[:,:,:3]).swapaxes(0,2)

In [7]:
def load_images(filenames, folder, size):
    features = np.empty([len(filenames), 3, size, size])
    for counter, file in enumerate(filenames):
#        print file, counter
        features[counter] = get_image(str(file), folder, size)
    
    return features

In [8]:
train_X = load_images(labels.id, "train", 32)
test_X = load_images(submissionFormat.id, "test", 32)

In [9]:
train_X.shape

(3969, 3, 32, 32)

In [10]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.optimizers import SGD

Using gpu device 0: GeForce GT 650M


In [19]:
model = Sequential()
model.add(Convolution2D(32, 3, 3, 3, border_mode='full')) 
model.add(Activation('relu'))
model.add(Convolution2D(32, 32, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(poolsize=(2, 2)))
model.add(Dropout(0.25))

model.add(Convolution2D(64, 32, 3, 3, border_mode='full')) 
model.add(Activation('relu'))
model.add(Convolution2D(64, 64, 3, 3)) 
model.add(Activation('relu'))
model.add(MaxPooling2D(poolsize=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(64*8*8, 256))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(256, 1))
model.add(Activation('sigmoid'))

In [20]:
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)

In [21]:
model.compile(loss='categorical_crossentropy', optimizer=sgd)

In [33]:
model.fit(train_X, np.array(labels.genus), batch_size=32, nb_epoch=10)

Epoch 0
Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9


<keras.callbacks.History at 0x129794490>

In [34]:
model_prediction = model.predict(test_X, batch_size=32)

In [35]:
min(model_prediction)

array([  1.37805717e-12])

In [36]:
submissionFormat.genus = model_prediction

In [37]:
submissionFormat.head()

Unnamed: 0,id,genus
0,2783,0.9999986
1,2175,0.9993863
2,4517,6.410659e-05
3,2831,8.829314e-05
4,3556,6.482812e-10


In [38]:
submissionFormat.to_csv("../Submission/model_2.csv")