# SE-ResNet Implementation

In [13]:
from __future__ import print_function
from __future__ import absolute_import
from __future__ import division
from keras.models import Model
from keras.layers import *
from keras.regularizers import l2
from keras.utils import conv_utils
from keras.utils.data_utils import get_file
from keras.engine.topology import get_source_inputs
from keras_applications.imagenet_utils import _obtain_input_shape
from keras_applications.resnet50 import preprocess_input
from keras_applications.imagenet_utils import decode_predictions
from keras import backend as K
from datetime import datetime
from keras.preprocessing.image import ImageDataGenerator
from visualization import *
import time
from keras.optimizers import SGD,Adam
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
import numpy as np 
import pandas as pd
from keras.utils import to_categorical

Here we create out squeeze excite block, this is the main contribution of the paper and adds a global average pooling and 2 dense layers after the normal resnet block

In [14]:
#two parameters: input and reduction ratio
def squeeze_excite_block(input, ratio=16):
    filter_kernels = input._keras_shape[-1]
    z_shape = (1, 1, filter_kernels)
    z = GlobalAveragePooling2D()(input)
    z = Reshape(z_shape)(z)
    s = Dense(filter_kernels//ratio, activation='relu', use_bias=False)(z)
    s = Dense(filter_kernels, activation='sigmoid', use_bias=False)(s)
    x = multiply([input, s])
    return x

Here we create out final netwokr block which is basically a bottleneck resnet block followed by the squeeze and excite block

In [15]:
def se_resnet_block_bottleneck(input,channels,_strides=(1, 1)):
    chan_axis=-1
    if(input._keras_shape[-1]!=channels or _strides!=(1,1)):
        input = Conv2D(channels, (1, 1), padding='same', kernel_initializer='he_normal',
                      use_bias=False, strides=_strides)(input)
    
    x = Conv2D(channels, (1, 1), padding='same', kernel_initializer='he_normal',
                      use_bias=False, strides=_strides)(input)
    x = BatchNormalization(axis=chan_axis)(x)
    x = Activation('relu')(x)
    
    
    x = Conv2D(channels, (3, 3), padding='same', kernel_initializer='he_normal',
                      use_bias=False, strides=_strides)(x)
    x = BatchNormalization(axis=chan_axis)(x)
    x = Activation('relu')(x)
    
    
    x = Conv2D(channels, (1, 1), padding='same', kernel_initializer='he_normal',
                      use_bias=False, strides=_strides)(x)
    x = BatchNormalization(axis=chan_axis)(x)
    x = Activation('relu')(x)
    
    
    x = squeeze_excite_block(x)
    out = add([x, input])
    return out

Here we implement the architecture given in the paper diagram using for loops and adding blocks continuously

In [16]:
def se_resnet(input,filters = [64,128,256,256,512],depth = [6,8,12,8,6],num_classes=4, weight_decay=1e-4):
# def se_resnet(input,filters = [64,128,256],depth = [6,8,6],num_classes=4, weight_decay=1e-4):
    chan_axis=-1
    x = Conv2D(filters[0], (3, 3), padding='same', use_bias=False, strides=(2, 2),
               kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(input)
    x = MaxPooling2D(pool_size=(2,2))(x)
    for i in range(len(filters)):
        x = se_resnet_block_bottleneck(x,filters[i],(2,2))
        for j in range(depth[i]-1):
            x = se_resnet_block_bottleneck(x,filters[i],(1,1))
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.85)(x)
    x = Dense(num_classes, activation='softmax', use_bias=False)(x)
    return x

Load the MNIST dataset here and create the create_model function that just uses the functions above to create our final model and return it.

In [17]:
import pickle
with open('./train_image.pkl', 'rb') as f:
    train_x = pickle.load(f,encoding='utf-8')
with open('./train_label.pkl', 'rb') as f:
    train_y = pickle.load(f,encoding='utf-8')
train_x = np.array(train_x).reshape(-1,28,28,1)
labels = np.array(train_y)
labels[labels==2]=1
labels[labels==3]=2
labels[labels==6]=3
train_y = np.zeros((labels.shape[0], 4))
train_y[np.arange(labels.shape[0]),labels] = 1

In [18]:
#train_x = np.expand_dims(train_x,axis=3)
#train_y = np.expand_dims(train_y,axis=1)
#train_y = to_categorical(train_y)
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
train_x, train_y = shuffle(train_x, train_y, random_state=0)
train_x, test_x, train_y, test_y = train_test_split(train_x, train_y, test_size=0.10, random_state=42)

def create_model(input_shape = (28, 28, 1),filters = [64,128,256,256,512],depth = [6,8,12,8,6],num_classes=4, weight_decay=1e-4):
#def create_model(input_shape = (28, 28, 1),filters = [64,128,256],depth = [6,8,6],num_classes=4, weight_decay=1e-4):
    input = Input(shape = input_shape)
    x = se_resnet(input,filters,depth,num_classes)
    model = Model(input, x)
    print(model.summary())
    return model

In [19]:
print(train_x.shape)
print(train_y.shape)

(7200, 28, 28, 1)
(7200, 4)


Creating the model and printing the architecture

In [20]:
model = create_model()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 28, 28, 1)    0                                            
__________________________________________________________________________________________________
conv2d_127 (Conv2D)             (None, 14, 14, 64)   576         input_2[0][0]                    
__________________________________________________________________________________________________
max_pooling2d_2 (MaxPooling2D)  (None, 7, 7, 64)     0           conv2d_127[0][0]                 
__________________________________________________________________________________________________
conv2d_128 (Conv2D)             (None, 4, 4, 64)     4096        max_pooling2d_2[0][0]            
__________________________________________________________________________________________________
conv2d_129

Basic Training code

In [21]:
#model_filename:where the model is checkpointed
from random_eraser import get_random_eraser
datagen = ImageDataGenerator(featurewise_center=True,featurewise_std_normalization=True,preprocessing_function=get_random_eraser(v_l=0, v_h=1))
datagen.fit(train_x)
model_id = 1
model_dir = './runs1'
timestr = time.strftime("%Y%m%d-%H%M%S")
model_filename = model_dir + '{}-{}.hdf5'.format(model_id, timestr)
print('model checkpoint file path: {}'.format(model_filename))
lr_reduction_factor = 0.2
min_learning_rate = 1e-07
#Adding early stopping,model_checkpoint,reduceLRonPlateau
early_stop = EarlyStopping(monitor='val_loss',
                           patience=3,
                           min_delta=0, 
                           verbose=1,
                           mode='auto')

model_checkpoint = ModelCheckpoint(model_filename,
                                   monitor='val_loss',
                                   verbose=1,
                                   save_best_only=True)

reduceLR = ReduceLROnPlateau(monitor='val_loss',
                             factor=lr_reduction_factor,
                             patience=2,
                             verbose=1,
                             min_lr=min_learning_rate,
                             epsilon=1e-4)
training_start_time = datetime.now()
model.compile(optimizer=Adam(1e-4),loss="categorical_crossentropy",metrics=["accuracy"])
history = model.fit_generator(datagen.flow(train_x,train_y,batch_size=16,shuffle=True),epochs=100,verbose=1,callbacks=[model_checkpoint],shuffle=True,validation_data=datagen.flow(test_x,test_y,batch_size=16,shuffle=True))
time_spent_trianing = datetime.now() - training_start_time
print('model training complete. time spent: {}'.format(time_spent_trianing))

model checkpoint file path: ./runs11-20190322-022352.hdf5




Epoch 1/100

Epoch 00001: val_loss improved from inf to 5.49540, saving model to ./runs11-20190322-022352.hdf5
Epoch 2/100

Epoch 00002: val_loss improved from 5.49540 to 4.36482, saving model to ./runs11-20190322-022352.hdf5
Epoch 3/100

Epoch 00003: val_loss improved from 4.36482 to 4.31566, saving model to ./runs11-20190322-022352.hdf5
Epoch 4/100

Epoch 00004: val_loss improved from 4.31566 to 4.18290, saving model to ./runs11-20190322-022352.hdf5
Epoch 5/100

Epoch 00005: val_loss improved from 4.18290 to 3.92172, saving model to ./runs11-20190322-022352.hdf5
Epoch 6/100

Epoch 00006: val_loss did not improve from 3.92172
Epoch 7/100

Epoch 00007: val_loss improved from 3.92172 to 3.73374, saving model to ./runs11-20190322-022352.hdf5
Epoch 8/100

Epoch 00008: val_loss did not improve from 3.73374
Epoch 9/100

Epoch 00009: val_loss did not improve from 3.73374
Epoch 10/100

Epoch 00010: val_loss did not improve from 3.73374
Epoch 11/100

Epoch 00011: val_loss did not improve from 


Epoch 00041: val_loss did not improve from 3.48158
Epoch 42/100

Epoch 00042: val_loss did not improve from 3.48158
Epoch 43/100

Epoch 00043: val_loss did not improve from 3.48158
Epoch 44/100

Epoch 00044: val_loss did not improve from 3.48158
Epoch 45/100

Epoch 00045: val_loss did not improve from 3.48158
Epoch 46/100

Epoch 00046: val_loss did not improve from 3.48158
Epoch 47/100

Epoch 00047: val_loss did not improve from 3.48158
Epoch 48/100

Epoch 00048: val_loss did not improve from 3.48158
Epoch 49/100

Epoch 00049: val_loss did not improve from 3.48158
Epoch 50/100

Epoch 00050: val_loss did not improve from 3.48158
Epoch 51/100

Epoch 00051: val_loss did not improve from 3.48158
Epoch 52/100

Epoch 00052: val_loss did not improve from 3.48158
Epoch 53/100

Epoch 00053: val_loss did not improve from 3.48158
Epoch 54/100

Epoch 00054: val_loss did not improve from 3.48158
Epoch 55/100

Epoch 00055: val_loss improved from 3.48158 to 3.46106, saving model to ./runs11-20190322

KeyboardInterrupt: 