In [135]:
import glob
import time
import os
import PIL
import numpy as np
import pandas as pd
from os import environ
from PIL import Image
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.utils import np_utils
from keras import backend as K
from sklearn.cross_validation import train_test_split

# attributes
train_path = 'train/'
test_path = 'test/'
folders = ["c0", "c1", "c2", "c3", "c4"]
num_classes = len(folders)
img_width, img_height = 300, 300

# user defined function to change keras backend
def set_keras_backend(backend):
    if K.backend() != backend:
       environ['KERAS_BACKEND'] = backend
       reload(K)
       assert K.backend() == backend

# call the function with "tensorflow"
set_keras_backend("tensorflow")
K.set_image_dim_ordering('tf')

In [136]:
# build model function
def build_model():
    model = Sequential()
    model.add(Conv2D(32, (3, 3), input_shape=(img_width, img_height, 3)))
    model.add(Activation('relu'))
    
    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(Flatten())
    model.add(Dense(64))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(5))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
    return model

In [137]:
# loading train images
def load_train():
    X_train = []
    X_train_id = []
    y_train = []
    start_time = time.time()

    print('Loading training images...')
    for fld in folders:
        index = folders.index(fld)
        print('Loading {} files (Index: {})'.format(fld, index))
        path = os.path.join(train_path, fld, '*.jpg')
        files = glob.glob(path)
        for fl in files:
            flbase = os.path.basename(fl)
            img = read_image(fl)
            X_train.append(img)
            X_train_id.append(flbase)
            y_train.append(index)

    print('Training data load time: {} seconds'.format(round(time.time() - start_time, 2)))
    return X_train, y_train, X_train_id

In [138]:
# loading test images
def load_test():
    path = os.path.join(test_path, '', '*.jpg')
    print (path)
    files = sorted(glob.glob(path))

    X_test = []
    X_test_id = []
    for fl in files:
        flbase = os.path.basename(fl)
        img = read_image(fl)
        X_test.append(img)
        X_test_id.append(flbase)

    return X_test, X_test_id

In [168]:
# read image function

def read_image(f):
    im = Image.open(f)
    im = im.resize((300, 300), PIL.Image.NEAREST)
    im = np.asarray(im, dtype='float64')
    #im = im.reshape(img_width,img_height,3)
    
    return(im)

In [169]:
def normalize_train_data():
    train_data, train_target, train_id = load_train()

    train_data = np.array(train_data, dtype=np.uint8)
    train_target = np.array(train_target, dtype=np.uint8)

    train_data = train_data.astype('float32')
    train_data = train_data / 255

    train_target = np_utils.to_categorical(train_target, num_classes)

    print('Shape of training data:', train_data.shape)
    return train_data, train_target, train_id

In [170]:
def normalize_test_data():
    test_data, test_id = load_test()

    test_data = np.array(test_data, dtype=np.uint8)

    test_data = test_data.astype('float32')
    test_data = test_data / 255

    print('Shape of testing data:', test_data.shape)
    return test_data, test_id

In [171]:
# normalizing train and test data
train_data, train_target, train_id = normalize_train_data()
test_data, test_id = normalize_test_data()

Loading training images...
Loading c0 files (Index: 0)
Loading c1 files (Index: 1)
Loading c2 files (Index: 2)
Loading c3 files (Index: 3)
Loading c4 files (Index: 4)
Training data load time: 1.44 seconds
Shape of training data: (120, 300, 300, 3)
test/*.jpg
Shape of testing data: (65, 300, 300, 3)


In [143]:
# create validation split and split train and test
val_split = .25
X_train, X_valid, Y_train, Y_valid = train_test_split(train_data, train_target, test_size=val_split)

In [144]:
# build model
model = build_model()
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_11 (Conv2D)           (None, 298, 298, 32)      896       
_________________________________________________________________
activation_21 (Activation)   (None, 298, 298, 32)      0         
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 296, 296, 32)      9248      
_________________________________________________________________
activation_22 (Activation)   (None, 296, 296, 32)      0         
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 148, 148, 32)      0         
_________________________________________________________________
flatten_6 (Flatten)          (None, 700928)            0         
_________________________________________________________________
dense_11 (Dense)             (None, 64)                44859456  
__________

In [145]:
# train model
model.fit(X_train,
          Y_train,
          batch_size=15,
          epochs=50,
          shuffle=True,
          verbose=1,
          validation_data=(X_valid, Y_valid))

Train on 90 samples, validate on 30 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x1287ace48>

In [146]:
# Predict test data class 
preds = model.predict_classes(test_data)
prob = model.predict_proba(test_data)
print(preds)

[4 0 0 1 0 0 4 3 2 4 4 1 2 4 0 3 3 3 0 3 4 4 4 4 3 4 3 4 4 4 3 0 4 2 4 0 2
 2 4 4 1 4 2 2 3 0 0 1 2 1 3 1 2 4 2 3 2 1 2 1 1 3 3 0 4]


In [162]:
# appending 'c' to predicted results
predChars = ['c' + str(pred) for pred in preds]

# creating a dataframe with filenames from test_id and labels from predicted classes
final_list = pd.DataFrame(
    {'FileName': test_id,
     'Label': predChars
    })

In [166]:
# saving the result to csv file
final_list.to_csv('submission.csv', sep=',', encoding='utf-8', index = False)