In [1]:
from __future__ import print_function
import pandas as pd
import numpy as np

## load the libraries 
import keras.backend as K
from keras.models import Model
from keras.layers import Dense, Input, Conv2D, LSTM, MaxPool2D, UpSampling2D, Flatten, Dropout, concatenate,GlobalAveragePooling2D, AveragePooling2D, Activation, Add, LeakyReLU, ReLU
from keras.layers.normalization import BatchNormalization
from keras.activations import relu
from keras.callbacks import EarlyStopping, LearningRateScheduler, ModelCheckpoint, ReduceLROnPlateau
from keras.utils import to_categorical
from keras.regularizers import l2
from keras.models import Model
from keras.initializers import glorot_uniform, Constant
from keras import optimizers
from keras.preprocessing.image import img_to_array, array_to_img
from keras.optimizers import Optimizer
from keras.utils.generic_utils import get_custom_objects
from keras.layers import Layer
from keras.preprocessing.image import ImageDataGenerator
from keras.models import load_model

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
train = pd.read_csv("data/fashion/fashion-mnist_train.csv")
test = pd.read_csv("data/fashion/fashion-mnist_test.csv")

In [3]:
train_x = train[list(train.columns)[1:]].values
train_y = train['label'].values
test_x = test[list(test.columns)[1:]].values
test_y = test['label'].values

In [4]:
train_x=train_x.reshape(60000,28,28,1)
test_x=test_x.reshape(10000,28,28,1)

In [5]:
train_y = to_categorical(train_y, num_classes = 10)
test_y = to_categorical(test_y, num_classes = 10)

In [6]:
train_x = train_x / 255
test_x = test_x / 255

In [7]:
train_x, val_x, train_y, val_y = train_test_split(train_x, train_y, test_size=0.1)

In [8]:
class Swish(Layer):
    def __init__(self, beta=1, **kwargs):
        super(Swish, self).__init__(**kwargs)
        self.beta = K.cast_to_floatx(beta)

    def call(self, inputs):
        return K.sigmoid(self.beta * inputs) * inputs

    def get_config(self):
        config = {'beta': float(self.beta)}
        base_config = super(Swish, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

    def compute_output_shape(self, input_shape):
        return input_shape

In [9]:
from keras.preprocessing.image import img_to_array, array_to_img

class MyImageDataGenerator(ImageDataGenerator):

    def __init__(self, featurewise_center=False, samplewise_center=False,
                 featurewise_std_normalization=False, samplewise_std_normalization=False,
                 zca_whitening=False, zca_epsilon=1e-06, rotation_range=0.0, width_shift_range=0.0,
                 height_shift_range=0.0, brightness_range=None, shear_range=0.0, zoom_range=0.0,
                 channel_shift_range=0.0, fill_mode='nearest', cval=0.0, horizontal_flip=False,
                 vertical_flip=False, rescale=None, preprocessing_function=None, data_format=None, validation_split=0.0,
                 random_crop=True, expand_rate=1.2):

        self.my_rescale = rescale
        if random_crop:
            rescale = None
        super().__init__(featurewise_center, samplewise_center, featurewise_std_normalization, samplewise_std_normalization, zca_whitening, zca_epsilon, rotation_range, width_shift_range,
                         height_shift_range, brightness_range, shear_range, zoom_range, channel_shift_range, fill_mode, cval, horizontal_flip, vertical_flip, rescale, preprocessing_function, data_format, validation_split)
        self.random_crop = random_crop
        self.expand_rate = expand_rate

    def scale_random_crop(self, original_img, seed):
        np.random.seed(seed)
        assert original_img.shape[2] == 1
        dy, dx = original_img.shape[0:2]
        expanded_img = img_to_array(array_to_img(original_img).resize((int(dy * self.expand_rate), int(dx * self.expand_rate))))
        height, width = expanded_img.shape[0:2]

        x = np.random.randint(0, width - dx + 1)
        y = np.random.randint(0, height - dy + 1)
        return expanded_img[y:(y + dy), x:(x + dx), :]

    def flow(self, x, y=None, batch_size=32, shuffle=True, sample_weight=None,
             seed=None, save_to_dir=None, save_prefix='', save_format='png', subset=None):
        batches = super().flow(x=x, y=y, batch_size=batch_size, shuffle=shuffle, sample_weight=sample_weight,
                               seed=seed, save_to_dir=save_to_dir, save_prefix=save_prefix, save_format=save_format, subset=subset)
        while True:
            batch = next(batches)
            batch_x = batch[0]
            batch_y = batch[1]
            if self.random_crop:
                x = np.zeros(batch_x.shape)
                y = np.zeros(batch_y.shape)
                for i in range(batch_x.shape[0]):
                    x[i] = self.scale_random_crop(batch_x[i], seed)
                    y[i] = batch_y[i]
                batch_x = x * self.rescale if self.rescale is not None else x
                batch_y = y
            yield (batch_x, batch_y)

In [10]:
datagen = MyImageDataGenerator(
        rotation_range=2,
        zoom_range = 0.01,
        width_shift_range=0.03,
        height_shift_range=0.03,
        horizontal_flip=True,
        vertical_flip=False,
        random_crop=True)


datagen.fit(train_x)

In [11]:
def main_block(x, filters, n, strides, activation, initializer, dropout):
    x_res = Conv2D(filters, (3,3), strides=strides, padding="same", kernel_initializer=initializer, kernel_regularizer=l2(5e-4))(x)
    x_res = BatchNormalization()(x_res)
    x_res = activation()(x_res)
    x_res = Conv2D(filters, (3,3), padding="same", kernel_initializer=initializer)(x_res)
    x = Conv2D(filters, (1,1), strides=strides)(x)
    x = Add()([x_res, x])

    for i in range(n-1):
        x_res = BatchNormalization()(x)
        x_res = activation()(x_res)
        x_res = Conv2D(filters, (3,3), padding="same", kernel_initializer=initializer)(x_res)
        if dropout: x_res = Dropout(rate=dropout)(x)
        x_res = BatchNormalization()(x_res)
        x_res = activation()(x_res)
        x_res = Conv2D(filters, (3,3), padding="same", kernel_initializer=initializer)(x_res)
        x = Add()([x, x_res])

    x = BatchNormalization()(x)
    x = activation()(x)
    return x

def build_model(input_dims, output_dim, n, k, activation, initializer, dropout=None):
    assert (n-4)%6 == 0
    assert k%2 == 0
    n = (n-4)//6 
    
    inputs = Input(shape=(input_dims))

    x = Conv2D(16, (3,3), padding="same", kernel_initializer=initializer)(inputs)
    x = BatchNormalization()(x)
    x = activation()(x)

    x = main_block(x, 32*k, n, (1,1), activation, initializer, dropout)
    x = main_block(x, 48*k, n, (2,2), activation, initializer, dropout)
    x = main_block(x, 64*k, n, (2,2), activation, initializer, dropout)

    x = AveragePooling2D((7,7))(x)
    x = Flatten()(x)
    outputs = Dense(output_dim, activation="softmax")(x)

    model = Model(inputs=inputs, outputs=outputs)
    return model

In [12]:
model = build_model((28,28,1), 10, 40, 4, Swish, 'he_uniform',  0.1498182282337851)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [13]:
from keras.optimizers import Optimizer


class AdaBound(Optimizer):

    def __init__(self, lr=0.001, final_lr=0.1, beta_1=0.9, beta_2=0.999, gamma=1e-3,
                 epsilon=None, decay=0., amsbound=False, weight_decay=0.0, **kwargs):
        super(AdaBound, self).__init__(**kwargs)

        if not 0. <= gamma <= 1.:
            raise ValueError("Invalid `gamma` parameter. Must lie in [0, 1] range.")

        with K.name_scope(self.__class__.__name__):
            self.iterations = K.variable(0, dtype='int64', name='iterations')
            self.lr = K.variable(lr, name='lr')
            self.beta_1 = K.variable(beta_1, name='beta_1')
            self.beta_2 = K.variable(beta_2, name='beta_2')
            self.decay = K.variable(decay, name='decay')

        self.final_lr = final_lr
        self.gamma = gamma

        if epsilon is None:
            epsilon = K.epsilon()
        self.epsilon = epsilon
        self.initial_decay = decay
        self.amsbound = amsbound

        self.weight_decay = float(weight_decay)
        self.base_lr = float(lr)

    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * K.cast(self.iterations,
                                                      K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1

        # Applies bounds on actual learning rate
        step_size = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                          (1. - K.pow(self.beta_1, t)))

        final_lr = self.final_lr * lr / self.base_lr
        lower_bound = final_lr * (1. - 1. / (self.gamma * t + 1.))
        upper_bound = final_lr * (1. + 1. / (self.gamma * t))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        if self.amsbound:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            # apply weight decay
            if self.weight_decay != 0.:
                g += self.weight_decay * K.stop_gradient(p)

            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)

            if self.amsbound:
                vhat_t = K.maximum(vhat, v_t)
                denom = (K.sqrt(vhat_t) + self.epsilon)
                self.updates.append(K.update(vhat, vhat_t))
            else:
                denom = (K.sqrt(v_t) + self.epsilon)

            # Compute the bounds
            step_size_p = step_size * K.ones_like(denom)
            step_size_p_bound = step_size_p / denom
            bounded_lr_t = m_t * K.minimum(K.maximum(step_size_p_bound,
                                                     lower_bound), upper_bound)

            p_t = p - bounded_lr_t

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates

    def get_config(self):
        config = {'lr': float(K.get_value(self.lr)),
                  'final_lr': float(self.final_lr),
                  'beta_1': float(K.get_value(self.beta_1)),
                  'beta_2': float(K.get_value(self.beta_2)),
                  'gamma': float(self.gamma),
                  'decay': float(K.get_value(self.decay)),
                  'epsilon': self.epsilon,
                  'weight_decay': self.weight_decay,
                  'amsbound': self.amsbound}
        base_config = super(AdaBound, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [14]:
#Hyperparameters for warmup and learning decay
epochs=100
warmup_epoch=epochs//20
alpha=0

#Linear scaling learning rate
batch=32
learning_rate=0.1*batch/256

adabound=AdaBound(lr=learning_rate, final_lr=0.1, gamma=1e-03,weight_decay=0.,amsbound=False)

In [15]:
model.compile(optimizer=adabound, loss='categorical_crossentropy', metrics=['accuracy'])

In [16]:
def warmup_cosine_decay(epoch, epochs=epochs, lr=learning_rate, warmup_epoch=warmup_epoch, alpha=alpha):
    return epoch*lr/warmup_epoch if epoch<warmup_epoch else (1 - alpha)*0.5*(1+np.cos(epoch*(np.pi))/epochs)+alpha

In [17]:
callbacks = [#LearningRateScheduler(warmup_cosine_decay),
             ModelCheckpoint("best-sota-model2.hdf5", monitor='val_acc', verbose=1, save_best_only=True, mode='max')]

In [18]:
history=model.fit_generator(generator=datagen.flow(train_x, train_y, batch_size=batch),
                    validation_data=datagen.flow(val_x, val_y, batch_size=batch),
                    steps_per_epoch=train_x.shape[0] * 3 // batch, validation_steps=val_x.shape[0] * 2 // batch,
                    epochs = epochs, callbacks=callbacks)

Instructions for updating:
Use tf.cast instead.
Epoch 1/100

Epoch 00001: val_acc improved from -inf to 0.79014, saving model to best-sota-model2.hdf5
Epoch 2/100

Epoch 00002: val_acc improved from 0.79014 to 0.85152, saving model to best-sota-model2.hdf5
Epoch 3/100

Epoch 00003: val_acc improved from 0.85152 to 0.85812, saving model to best-sota-model2.hdf5
Epoch 4/100

Epoch 00004: val_acc improved from 0.85812 to 0.88026, saving model to best-sota-model2.hdf5
Epoch 5/100

Epoch 00005: val_acc improved from 0.88026 to 0.89296, saving model to best-sota-model2.hdf5
Epoch 6/100

Epoch 00006: val_acc improved from 0.89296 to 0.90625, saving model to best-sota-model2.hdf5
Epoch 7/100

Epoch 00007: val_acc improved from 0.90625 to 0.90892, saving model to best-sota-model2.hdf5
Epoch 8/100

Epoch 00008: val_acc improved from 0.90892 to 0.91803, saving model to best-sota-model2.hdf5
Epoch 9/100

Epoch 00009: val_acc did not improve from 0.91803
Epoch 10/100

Epoch 00010: val_acc did not i


Epoch 00081: val_acc did not improve from 0.94820
Epoch 82/100

Epoch 00082: val_acc did not improve from 0.94820
Epoch 83/100

Epoch 00083: val_acc did not improve from 0.94820
Epoch 84/100

Epoch 00084: val_acc did not improve from 0.94820
Epoch 85/100

Epoch 00085: val_acc did not improve from 0.94820
Epoch 86/100

Epoch 00086: val_acc did not improve from 0.94820
Epoch 87/100

Epoch 00087: val_acc did not improve from 0.94820
Epoch 88/100

Epoch 00088: val_acc did not improve from 0.94820
Epoch 89/100

Epoch 00089: val_acc did not improve from 0.94820
Epoch 90/100

Epoch 00090: val_acc did not improve from 0.94820
Epoch 91/100

Epoch 00091: val_acc did not improve from 0.94820
Epoch 92/100

Epoch 00092: val_acc did not improve from 0.94820
Epoch 93/100

Epoch 00093: val_acc did not improve from 0.94820
Epoch 94/100

Epoch 00094: val_acc did not improve from 0.94820
Epoch 95/100

Epoch 00095: val_acc did not improve from 0.94820
Epoch 96/100

Epoch 00096: val_acc did not improve fr

In [19]:
model.save('current-sota-model2.hdf5')

In [20]:
y_pred = model.predict(test_x)
accuracy_score(np.argmax(test_y, axis=1), np.argmax(y_pred, axis=1))

0.9471

In [21]:
best_model = build_model((28,28,1), 10, 40, 4, Swish, 'he_uniform',  0.1498182282337851)

In [23]:
best_model.load_weights("best-sota-model2.hdf5")

In [24]:
y_pred = best_model.predict(test_x)
accuracy_score(np.argmax(test_y, axis=1), np.argmax(y_pred, axis=1))

0.9451