In [1]:
from keras.models import Model
from keras.layers import Input, Conv2D, AveragePooling2D, GlobalAveragePooling2D, BatchNormalization, ReLU, Add, Dense, Flatten, Concatenate
from keras.optimizers import Adam, RMSprop
from keras.utils import load_img, img_to_array, to_categorical, plot_model

import numpy as np
import pandas as pd
import csv
import os
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt


In [2]:
DIR_TRAIN = "./train/"
DIR_VAL = "./val/"
DIR_TEST = "./test/"

PATH_TRAIN = "./train.csv"
PATH_TEST = "./test.csv"

In [3]:
data = pd.read_csv(PATH_TRAIN, delimiter=',', header=0)
data.head()

num_classes = int(data['class'].max()) + 1

In [4]:
import shutil

def data_for_dataset():
    
    for i in range(num_classes):
        dir = os.path.join(DIR_TRAIN, str(i))
        if not os.path.exists(dir):
            os.mkdir(dir)
        for f in data[data['class']==i].values:
            fn = os.path.join(dir, f[0])
            if not os.path.exists(fn):
                shutil.move(os.path.join(DIR_TRAIN, f[0]), fn)
    
    if not os.path.exists(DIR_VAL):
        os.mkdir(DIR_VAL)
    for i in range(num_classes):
        dir = os.path.join(DIR_VAL, str(i))
        if not os.path.exists(dir):
            os.mkdir(dir)
        ls = sorted(os.listdir(os.path.join(DIR_TRAIN, str(i))))
        for j in range(len(ls)//10):
            ffrom = os.path.join(DIR_TRAIN, str(i), ls[j])
            fto = os.path.join(DIR_VAL, str(i), ls[j])
            if os.path.exists(ffrom) and not os.path.exists(fto):
                shutil.move(ffrom, fto)

#data_for_dataset()


In [6]:
shape = (256, 256)
xtrain = np.array([img_to_array(load_img(os.path.join(DIR_TRAIN, s), color_mode='rgb', target_size=shape)) for s in data.ID_img])
ytrain = np.array([to_categorical(float(s), num_classes=num_classes) for s in data['class']])

#xtrain, xtest, ytrain, ytest = train_test_split(xtrain, ytrain, test_size=.1)
#print(xtrain.shape, xtest.shape, ytrain.shape, ytest.shape)
print(xtrain.shape, ytrain.shape)

(4990, 256, 256, 3) (4990, 8)


In [7]:
from keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    validation_split=0.1)
datagen.fit(xtrain)

In [9]:
def model1():
    inp = Input(shape=(256, 256, 3))

    x = Conv2D(8, 3, padding='same')(inp)

    for k in [16, 32, 64, 128, 256, 512]:
        y = BatchNormalization()(x)
        y = ReLU()(y)
        y = Conv2D(k, 3, padding='same')(y)
        y = BatchNormalization()(y)
        y = ReLU()(y)
        y = Conv2D(k, 3, padding='same')(y)
        y = AveragePooling2D(2)(y)
        
        x = Conv2D(k, 1, padding='same')(x)
        x = AveragePooling2D(2)(x)
        
        x = Add()([x,y])
        x = BatchNormalization()(x)
        x = ReLU()(x)
    x = Flatten()(x)

    x = Dense(512)(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    x = Dense(num_classes, activation='softmax')(x)

    model = Model(inp, x)
    model.compile(optimizer=RMSprop(learning_rate=1e-4), loss='categorical_crossentropy', metrics='accuracy')

    return model, './ulite.h5'

#plot_model(model, show_layer_names=True, show_shapes=True, dpi=64)


In [10]:
def model2():
    inp = Input(shape=(256, 256, 3))
    x = BatchNormalization()(inp)

    for k in [64, 128]:
        #x = Conv2D(k, 3, padding='valid')(x)
        #x = BatchNormalization()(x)
        #x = ReLU()(x)
        x = Conv2D(k, 3, strides=2, padding='valid')(x)
        x = BatchNormalization()(x)
        x = ReLU()(x)

    a = []
    for i in range(16):
        y = Conv2D(16, 9, padding='valid')(x)
        y = BatchNormalization()(y)
        y = ReLU()(y)
        y = Conv2D(k, 9, strides=2, padding='valid')(y)
        a.append(y)
    
    x = Concatenate()(a)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    x = Conv2D(512, 3, padding='valid')(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    
    #x = Flatten()(x)
    x = GlobalAveragePooling2D()(x)

    x = Dense(512)(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    x = Dense(num_classes, activation='softmax')(x)

    model = Model(inp, x)
    model.compile(optimizer=RMSprop(learning_rate=1e-4), loss='categorical_crossentropy', metrics='accuracy')

    return model, './ulite2.h5'

#plot_model(model, show_layer_names=True, show_shapes=True, dpi=64)


In [11]:
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

model, path_weights = model1()

es = EarlyStopping(patience=10)
mcp = ModelCheckpoint(path_weights, mode='min', save_best_only=True, save_weights_only=True)
rop = ReduceLROnPlateau(patience=8, min_lr=1e-10)


In [12]:
history = model.fit(datagen.flow(xtrain, ytrain, batch_size=20, subset='training'), epochs=100, verbose=1, validation_data=datagen.flow(xtrain, ytrain, batch_size=20, subset='validation'), callbacks=[es, mcp, rop])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100

In [None]:
model.load_weights(path_weights)
model.evaluate(xtest, ytest)



[0.00528649752959609, 1.0]

In [None]:
data = pd.read_csv(PATH_TEST, delimiter=',', header=0)
data.head()

Unnamed: 0,ID_img,class
0,0.jpg,0
1,1.jpg,0
2,2.jpg,0
3,3.jpg,0
4,4.jpg,0


In [None]:
xval = np.array([img_to_array(load_img(os.path.join(DIR_TEST, s), color_mode='rgb', target_size=shape)) for s in data.ID_img])

In [None]:
pred = model.predict(xval)
pred = [np.argmax(i) for i in pred]



In [None]:
del(data['class'])
data['class'] = pred
data.head()

Unnamed: 0,ID_img,class
0,0.jpg,5
1,1.jpg,2
2,2.jpg,1
3,3.jpg,1
4,4.jpg,6


In [None]:
data.to_csv('./submit.csv', index=False)