In [None]:
import os
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import *

In [None]:
DATA_DIR   = os.path.join(os.getcwd(), "data")
OUTPUT_DIR = os.path.join(os.getcwd(), "output")

# Utility functions
Path     = os.path.join
DataPath = lambda path: os.path.join(DATA_DIR, path)

def mkdir_p(directory):
    if not os.path.exists(directory):
        os.mkdir(directory)

In [None]:
img_width, img_height = 128, 128

# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(
            rescale=1./255,
            shear_range=0.2,
            zoom_range=0.2,
            horizontal_flip=True,
            validation_split=0.2)

train_generator = train_datagen.flow_from_directory(
        Path(DATA_DIR, 'verified'),
        color_mode='rgba',
        target_size=(img_width, img_height),
        batch_size=128,
        class_mode='categorical',
        subset='training'
)

valid_generator = train_datagen.flow_from_directory(
        Path(DATA_DIR, 'verified'),
        color_mode='rgba',
        target_size=(img_width, img_height),
        batch_size=512,
        class_mode='categorical',
        subset='validation'
)

# Split using https://stackoverflow.com/questions/42443936/keras-split-train-test-set-when-using-imagedatagenerator
print("Label codes : ", train_generator.class_indices)
print("Training data   : ", train_generator.n)
print("Validation data : ", valid_generator.n)

In [None]:
model = Sequential([
    Conv2D(32, (5, 5), input_shape=(img_width, img_height, 4)),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(5, activation='softmax')
])

In [None]:
# Compile with Binary Crossentropy loss and SGD optimizer
model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=1e-4, momentum=0.9), metrics=['accuracy'])

In [None]:
# Train for 5 epochs
model.fit_generator(train_generator, epochs=5, validation_data=valid_generator)

In [None]:
test_datagen = ImageDataGenerator(
    rescale = 1./255
)

test_generator = test_datagen.flow_from_directory(
    Path(DATA_DIR, "test"),
    color_mode = 'rgba',
    target_size = (img_width, img_height),
    batch_size = 25,
    class_mode=None
)

# Split using https://stackoverflow.com/questions/42443936/keras-split-train-test-set-when-using-imagedatagenerator
print("Label codes : ", test_generator.class_indices)
print("Test data   : ", test_generator.n)

In [None]:
output = model.predict_generator(test_generator)
fids = map(lambda x: os.path.basename(x).split('.')[0], test_generator.filepaths)

df = pd.DataFrame(output, index=fids, columns=train_generator.class_indices)
df.index.name = 'id'
submission_format = pd.read_csv(Path(OUTPUT_DIR, 'submission_format.csv'), index_col='id')
df = df.reindex(submission_format.index)
df.to_csv(Path(OUTPUT_DIR, "submission.csv"))

In [None]:
!dd-sub-valid output/submission_format.csv output/submission.csv