## 1 Imports & global Vars

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import h5py
import dask.array as da
from dask.array.image import imread
from tqdm import tqdm
import random
from glob import glob
import skimage.io as skio

from keras.models import *
from keras.layers import *
from keras.optimizers import *
from keras.preprocessing.image import *

Using TensorFlow backend.


In [2]:
in_dim = (192,192,1)
out_dim = 3
batch_size = 64

In [3]:
def smart_crop(path, size):
    files = glob(path)
    for i in tqdm(range(len(files))):
        img = skio.imread(files[i], as_grey=True)
        if img.shape[0] > size:
            arr = np.where(img.sum(axis=0) > 0)[0]
            x_c = (arr[0]+arr[-1])//2
            arr = np.where(img.sum(axis=1) > 0)[0]
            y_c = (arr[0]+arr[-1])//2
            img = img[y_c-size//2:y_c+size//2, x_c-size//2:x_c+size//2]
            skio.imsave(files[i], img)

In [4]:
def create_labels(arr):
    labels = []
    for i in range(len(arr)):
        proto = np.zeros(len(arr))
        proto[i] = 1
        for j in range(arr[i]):
            labels.append(proto)
    return da.from_array(np.array(labels), chunks=1000)

## 2 Preprocess Images

The original images are 500x500 pixel with lots of unused space. The content however, isnt perfectly centered, so i defined a smart crop function that analyses the content of each image and crops around the actual content based center.

In [58]:
smart_crop('data/train/*/*.jpg', in_dim[0])
smart_crop('data/validation/*/*.jpg', in_dim[0])
smart_crop('data/test/*.jpg', in_dim[0])

100%|██████████| 566/566 [00:00<00:00, 1743.94it/s]
100%|██████████| 110/110 [00:00<00:00, 1767.67it/s]
100%|██████████| 75/75 [00:00<00:00, 1786.18it/s]


In [59]:
x_tr = imread('data/train/*/*.jpg')
y_tr = create_labels([222,174,170])
print("Train:", x_tr.shape, y_tr.shape)

x_va = imread('data/validation/*/*.jpg')
y_va = create_labels([39,38,33])
print("Validation:", x_va.shape, y_va.shape)

x_te = imread('data/test/*.jpg')
print("Test:", x_te.shape)

Train: (566, 192, 192) (566, 3)
Validation: (110, 192, 192) (110, 3)
Test: (75, 192, 192)


In [60]:
x_tr = x_tr / 255.
x_tr = x_tr[..., np.newaxis]

x_va = x_va / 255.
x_va = x_va[..., np.newaxis]

x_te = x_te / 255.
x_te = x_te[..., np.newaxis]

In [61]:
perm = np.random.permutation(len(y_tr))
x_tr = x_tr[perm]
y_tr = y_tr[perm]

In [62]:
x_tr.astype('float16').to_hdf5('data/x_tr.h5', 'x_tr')
y_tr.astype('uint8').to_hdf5('data/y_tr.h5', 'y_tr')

x_va.astype('float16').to_hdf5('data/x_va.h5', 'x_va')
y_va.astype('uint8').to_hdf5('data/y_va.h5', 'y_va')

x_te.astype('float16').to_hdf5('data/x_te.h5', 'x_te')

## 3 Load Data

In [5]:
x_tr = da.from_array(h5py.File('data/x_tr.h5')['x_tr'], chunks=1000)
y_tr = da.from_array(h5py.File('data/y_tr.h5')['y_tr'], chunks=10000)

x_va = da.from_array(h5py.File('data/x_va.h5')['x_va'], chunks=1000)
y_va = da.from_array(h5py.File('data/y_va.h5')['y_va'], chunks=10000)

x_te = da.from_array(h5py.File('data/x_te.h5')['x_te'], chunks=1000)

In [6]:
gen = ImageDataGenerator(rotation_range=0.1,
                         width_shift_range=0.1,
                         height_shift_range=0.1,
                         shear_range=0.1,
                         zoom_range=0.2,
                         horizontal_flip=True)
train_gen = gen.flow(x_tr, y_tr, batch_size=batch_size)
gen = ImageDataGenerator()
valid_gen = gen.flow(x_va, y_va, batch_size=batch_size)

## 4 Model

In [13]:
i = Input(shape=in_dim)
m = BatchNormalization()(i)
m = Conv2D(16, 3, activation='elu', padding='same')(m)
m = Conv2D(16, 3, activation='elu', padding='same', strides=2)(m)
m = Conv2D(16, 3, activation='elu', padding='same')(m)
m = Conv2D(16, 3, activation='elu', padding='same', strides=2)(m)
m = Conv2D(32, 3, activation='elu', padding='same')(m)
m = Conv2D(32, 3, activation='elu', padding='same', strides=2)(m)
m = Conv2D(32, 3, activation='elu', padding='same')(m)
m = Conv2D(32, 3, activation='elu', padding='same', strides=2)(m)
m = Conv2D(64, 3, activation='elu', padding='same')(m)
m = Conv2D(64, 3, activation='elu', padding='same', strides=2)(m)
m = Conv2D(16, 1, activation='elu', padding='same')(m)
m = GlobalAveragePooling2D()(m)
o = Dense(out_dim, activation='softmax')(m)

model = Model(inputs=i, outputs=o)
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.001), metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, 192, 192, 1)       0         
_________________________________________________________________
batch_normalization_4 (Batch (None, 192, 192, 1)       4         
_________________________________________________________________
conv2d_34 (Conv2D)           (None, 192, 192, 16)      160       
_________________________________________________________________
conv2d_35 (Conv2D)           (None, 96, 96, 16)        2320      
_________________________________________________________________
conv2d_36 (Conv2D)           (None, 96, 96, 16)        2320      
_________________________________________________________________
conv2d_37 (Conv2D)           (None, 48, 48, 16)        2320      
_________________________________________________________________
conv2d_38 (Conv2D)           (None, 48, 48, 32)        4640      
__________

## 4 Training

In [14]:
model.fit_generator(train_gen, validation_data=(x_va, y_va), epochs=10, steps_per_epoch=120)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x136e94048>

In [33]:
model.save('model_8818_3')

## 5 Verify

In [80]:
smart_crop('data/test/*.jpg', in_dim[0])

100%|██████████| 75/75 [00:00<00:00, 1807.92it/s]


In [91]:
test = imread('data/test/*.jpg')
test = test / 255.
test = test[..., np.newaxis]

In [92]:
model = load_model('model_9376')

In [34]:
pred = model.predict(x_te)
pred = pred.argmax(axis=1)
pred

array([2, 2, 2, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
       1, 2, 0, 0, 2, 2])

In [94]:
test_imgs = sorted(glob('data/test/*.jpg'))

In [95]:
t1 = pd.Series(test_imgs)
t2 = pd.Series(pred)
df = pd.concat([t1, t2], axis=1, keys=['File', 'Class'])
df.to_csv('prediction.csv')