In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import StratifiedKFold
from sklearn.utils import shuffle

In [2]:
reduction_ratio = 16
epochs = 1200
batch = 32

In [3]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [4]:
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=10)
splited = kfold.split(train, train['digit'])

In [5]:
target = train['digit']
del train['digit']
del train['id']
del train['letter']

In [6]:
col_list = [str(i) for i in range(784)]
total_image_df = train[col_list]
test_image_df = test[col_list]

total_arr = total_image_df.values.reshape((len(total_image_df), 28, 28))
test_arr = test_image_df.values.reshape((len(test_image_df), 28, 28))

In [7]:
train_datagen = ImageDataGenerator(
    rescale=1./255.,
    rotation_range=30,
    width_shift_range=0.3,
    height_shift_range=0.3,
    shear_range=0.3,
    zoom_range=0.3,
    fill_mode='nearest'
)
test_datagen = ImageDataGenerator(
    rescale=1./255.
)

In [8]:
#SE-ResNet
class SE_ResidualUnit(tf.keras.layers.Layer):  
    def __init__(self, filter_in, filter_out, reduction_ratio, kernel_size,**kwargs):
        super().__init__(**kwargs)
        ##HyperParameter##
        self.filter_in = filter_in
        self.filter_out = filter_out
        self.reduction_ratio = reduction_ratio
        self.kernel_size = kernel_size
        ##################
        
        self.bn1 = tf.keras.layers.BatchNormalization()
        self.elu1 = tf.keras.layers.ELU()
        self.conv1 = tf.keras.layers.Conv2D(
            filter_out//2, (1, 1),  kernel_initializer="he_normal", padding='same', kernel_constraint=tf.keras.constraints.max_norm(1., axis=[0,1]))

        self.bn2 = tf.keras.layers.BatchNormalization()
        self.elu2 = tf.keras.layers.ELU()
        self.conv2 = tf.keras.layers.Conv2D(
            filter_out//2, kernel_size, kernel_initializer="he_normal",
            padding = 'same', kernel_constraint=tf.keras.constraints.max_norm(1., axis=[0,1]))

        self.bn3 = tf.keras.layers.BatchNormalization()
        self.elu3 = tf.keras.layers.ELU()
        self.conv3 = tf.keras.layers.Conv2D(
            filter_out, (1, 1), kernel_initializer="he_normal", padding='same',
             kernel_constraint=tf.keras.constraints.max_norm(1., axis=[0,1]))
        
        self.gp = tf.keras.layers.GlobalAveragePooling2D()
        self.dense1 = tf.keras.layers.Dense(
            filter_out//reduction_ratio, kernel_initializer="he_normal", activation='elu' ,use_bias=False)
        self.dense2 = tf.keras.layers.Dense(
            filter_out, activation='sigmoid', kernel_initializer="he_normal", use_bias=False)
        self.reshape = tf.keras.layers.Reshape([1, 1, filter_out])
        self.mul = tf.keras.layers.Multiply()
        if filter_in == filter_out:
          self.identity = lambda x:x
        else:
          self.identity = tf.keras.layers.Conv2D(
                filter_out, (1, 1),  padding='same')

    def call(self, x, training=None):
        h = self.bn1(x, training=training)
        h = self.elu1(h)
        h = self.conv1(h)

        h = self.bn2(h, training=training)
        h = self.elu2(h)
        h = self.conv2(h)

        h = self.bn3(h, training=training)
        h = self.elu3(h)
        h = self.conv3(h)

        s = self.gp(h)
        s = self.dense1(s)
        s = self.dense2(s)
        s = self.reshape(s)
        s = self.mul([s, h])
        return self.identity(x) + s
        
    def get_config(self):
        config = super().get_config()
        config.update({"filter_in":self.filter_in, "filter_out":self.filter_out, 
                       "reduction_ratio":self.reduction_ratio,"kernel_size":self.kernel_size
                       })
        return config

class ResidualUnit(tf.keras.layers.Layer):
    def __init__(self, filter_in, filter_out, kernel_size, **kwargs):
        super().__init__(**kwargs)
        ##HyperParameter##
        self.filter_in = filter_in
        self.filter_out = filter_out
        self.kernel_size = kernel_size
        ##################
        self.bn1 = tf.keras.layers.BatchNormalization()
        self.elu1 = tf.keras.layers.ELU()
        self.conv1 = tf.keras.layers.Conv2D(
            filter_out//2, (1, 1), padding='same', kernel_initializer="he_normal", kernel_constraint=tf.keras.constraints.max_norm(1., axis=[0,1]))

        self.bn2 = tf.keras.layers.BatchNormalization()
        self.elu2 = tf.keras.layers.ELU()
        self.conv2 = tf.keras.layers.Conv2D(
            filter_out//2, kernel_size, padding='same', kernel_initializer="he_normal", kernel_constraint=tf.keras.constraints.max_norm(1., axis=[0,1]))

        self.bn3 = tf.keras.layers.BatchNormalization()
        self.elu3 = tf.keras.layers.ELU()
        self.conv3 = tf.keras.layers.Conv2D(
            filter_out, (1, 1), padding='same', kernel_initializer="he_normal", kernel_constraint=tf.keras.constraints.max_norm(1., axis=[0,1]))
        
        if filter_in == filter_out:
          self.identity = lambda x:x
        else:
          self.identity = tf.keras.layers.Conv2D(
                filter_out, (1, 1),  padding='same')

    def call(self, x, training=None):
        h = self.bn1(x, training=training)
        h = self.elu1(h)
        h = self.conv1(h)

        h = self.bn2(h, training=training)
        h = self.elu2(h)
        h = self.conv2(h)

        h = self.bn3(h, training=training)
        h = self.elu3(h)
        h = self.conv3(h)

        return self.identity(x) + h
    def get_config(self):
        config = super().get_config()
        config.update({ "filter_in":self.filter_in, "filter_out":self.filter_out, "kernel_size":self.kernel_size
                        })
        return config

In [9]:
##SE_ResNet##
def build_model():
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Conv2D(filters=128, kernel_size=(3,3), kernel_initializer='he_normal', activation='elu', padding='same',
                                    kernel_constraint=tf.keras.constraints.max_norm(1., axis=[0,1]), input_shape=[28,28,1]))

    prev_channel = 128
    idx = 0
    for channel in [128] * 4 + [256] * 4 + [512] * 1:
        if idx == 3 or idx == 4:
            model.add(SE_ResidualUnit(filter_in=prev_channel, filter_out=channel, reduction_ratio=reduction_ratio, kernel_size=(3,3)))
            model.add(tf.keras.layers.MaxPool2D((2,2)))
            model.add(tf.keras.layers.Dropout(0.2))   
        else:
            model.add(SE_ResidualUnit(filter_in=prev_channel, filter_out=channel, reduction_ratio=reduction_ratio, kernel_size=(3,3)))
        idx += 1
        prev_channel = channel

    model.add(ResidualUnit(512, 512, (3,3)))
    model.add(tf.keras.layers.MaxPool2D((3,3)))
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(1024, activation='elu', kernel_initializer='he_normal', kernel_constraint=tf.keras.constraints.max_norm(1.)))
    model.add(tf.keras.layers.Dropout(0.5))
    model.add(tf.keras.layers.Dense(10, activation='softmax')) 
    return model

In [None]:
for k, (train, val) in enumerate(splited):
  print("=========={}th========= ".format(k))
  model = build_model()
  model.compile(loss='sparse_categorical_crossentropy', optimizer=tf.keras.optimizers.Nadam(epsilon=1e-04), metrics=["accuracy"])
  train_x, train_y = total_arr[train], target[train]
  val_x, val_y = total_arr[val], target[val]
  train_img = train_x[...,tf.newaxis]
  val_img = val_x[...,tf.newaxis]
  filename = "emnist_{}.h5".format(k)
  checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(filename, save_best_only=True, monitor='val_loss', mode='auto')
  earlystopping = tf.keras.callbacks.EarlyStopping(patience=100, restore_best_weights=True)
  lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(factor=0.8, patience=30)
  model.fit(train_datagen.flow(train_img, train_y, shuffle=True, batch_size=batch), epochs=epochs, validation_data=test_datagen.flow(val_img, val_y,batch_size=batch),
                              steps_per_epoch=len(train_img)//batch, validation_steps=len(val_img)//batch, callbacks=[checkpoint_cb, earlystopping, lr_scheduler])

  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 57 steps, validate for 6 steps
Epoch 1/1200
Epoch 2/1200
Epoch 3/1200

In [None]:
test_img = test_arr[...,tf.newaxis] # add channel
test_img = test_img/255.

In [None]:
def get_models():
    models = []
    for i in range(10):
        filename = "emnist_{}.h5".format(i)
        model = tf.keras.models.load_model(filename, custom_objects={"SE_ResidualUnit":SE_ResidualUnit, "ResidualUnit":ResidualUnit})
        models.append(model)
    return models

def get_predict(models):
    predicts = []
    for i in range(10):
        predict = models[i].predict(test_img)
        predicts.append(predict)
    return predicts

In [None]:
models = get_models()
predicts = get_predict(models)
predict_mean = predicts[0]
for i in range(1, len(predicts)):
    predict_mean += predicts[i]
predict_mean = predict_mean/10

In [None]:
submission = pd.read_csv('submission.csv')

In [None]:
digit = np.argmax(predict_mean, axis=1)

In [None]:
submission.digit = digit

In [None]:
submission.to_csv('final_sub.csv', index=False)