    - each ResNet block is either 2 layer deep (for small networks like ResNet 18, 34)
    - or 3 layer deep (ResNet 50, 101, 152)

In [1]:
# import the necessary libraries
import numpy as np
import pandas as pd

pd.set_option('display.max_rows',1000)
pd.set_option('display.max_columns',1000)

from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, Activation,\
BatchNormalization, Dense, Flatten, Input, add
from keras.regularizers import l2
from keras.models import Model
import keras.backend as K

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


Couldn't import dot_parser, loading of dot files will not be possible.


In [2]:
# libraries for cifar-10
from sklearn.preprocessing import LabelBinarizer
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
from keras.datasets import cifar10
from keras.models import load_model

#import sys

# set a high recursion limit so theano doesn't complain
#sys.setrecursionlimit(5000)

In [3]:
def residual_module(inputs, num_filters, stride, reduce=False,
                    reg_str=0.0001, bn_eps=2e-5, bn_mom=0.9):
    '''
    pre-activation residual module
    ---
    Args:
        inputs:
        num_filters:
        chanDim:
        strides:
        reduce:
        reg_str:
        bn_eps:
        bn_mom:

    Returns:
        x:
    '''
    shortcut = inputs
    # first block of ResNet module - 1 x 1 CONV
    bn1 = BatchNormalization(epsilon=bn_eps,
                             momentum=bn_mom)(inputs)
    act1 = Activation('relu')(bn1)
    conv1 = Conv2D(filters=int(num_filters*0.25), kernel_size=(1, 1),
                   use_bias=False, kernel_regularizer=l2(reg_str))(act1)

    # second block of ResNet module - 3 x 3 CONV
    bn2 = BatchNormalization(epsilon=bn_eps,
                             momentum=bn_mom)(conv1)
    act2 = Activation('relu')(bn2)
    conv2 = Conv2D(filters=int(num_filters*0.25), kernel_size=(3, 3),
                   strides=stride, padding='same', use_bias=False,
                   kernel_regularizer=l2(reg_str))(act2)

# third and final block of ResNet module - 1 x 1 CONV
    bn3 = BatchNormalization(epsilon=bn_eps,
                             momentum=bn_mom)(conv2)
    act3 = Activation('relu')(bn3)
    conv3 = Conv2D(filters=num_filters, kernel_size=(1, 1), use_bias=False,
                   kernel_regularizer=l2(reg_str))(act3)

# if we are to reduce the spatial size, apply a CONV layer to the shortcut
    if reduce:
        shortcut = Conv2D(filters=num_filters, kernel_size=(1, 1),
                          strides=stride, use_bias=False,
                          kernel_regularizer=l2(reg_str))(act1)

# add together the shortcut and the final CONV
    x = add([conv3, shortcut])

# return the addition as the output of the ResNet module
    return x

In [4]:
def build(width, height, depth, num_classes, stages, num_filters,
          reg_str=0.0001, bn_eps=2e-5, bn_mom=0.9, dataset='cifar'):
    '''
    a
    ---
    Args:
      width:
      height:
      depth:
      num_classes:
      stages: list,
      num_filters: list,
      reg_str:
      bn_eps:
      bn_mom:
      dataset:

    Returns:
        ?:
    '''
    inputShape = (height, width, depth)
    chanDim = -1

    # if we are using 'channels first', update the input shape
    # and channels dimension
    if K.image_data_format() == 'channels_first':
        inputShape = (depth, height, width)
        chanDim = 1

    # set the input and apply BN
    inputs = Input(shape=inputShape)

    x = BatchNormalization(axis=chanDim, epsilon=bn_eps,
                           momentum=bn_mom)(inputs)

    # check if we utilizing the CIFAR dataset
    if dataset == 'cifar':
        # apply a single CONV layer
        x = Conv2D(filters=num_filters[0], kernel_size=(3, 3),
                   use_bias=False, padding='same',
                   kernel_regularizer=l2(reg_str))(x)

    # loop over the number of stages - number of residual modules to stack
    for i in range(0, len(stages)):
        # initialize the strides, then apply a residual module used to reduce
        # the spatial size of the input volume
        strides = (1, 1) if i == 0 else (2, 2)
        x = residual_module(inputs=x, num_filters=num_filters[i+1],
                            strides=strides, chanDim=chanDim, bn_eps=bn_eps,
                            bn_mom=bn_mom)

        # loop over the number of layers in the stage
        for j in range(0, stages[i]-1):
            # apply a ResNet module
            x = residual_module(inputs=x, num_filters=num_filters[i+1],
                                strides=(1, 1), chanDim=chanDim, bn_eps=bn_eps,
                                bn_mom=bn_mom)

    # apply BN => ACT => POOL
    x = BatchNormalization(axis=chanDim, epsilon=bn_eps,
                           momentum=bn_mom)(x)
    x = Activation('relu')(x)
    x = AveragePooling2D(pool_size=(8, 8))(x)

    # softmax classifier
    x = Flatten()(x)
    x = Dense(units=num_classes, kernel_regularizer=l2(reg_str))(x)
    x = Activation('softmax')(x)

    # create the model
    model = Model(inputs=inputs, outputs=x, name='resnet')

    # return the constructed network architecture
    return model

In [5]:
# parameter settings
reg_str = 0.0005
bn_eps = 2e-5
bn_mom = 0.9
num_classes = 10
stages = (9,9,9)
filters = (64,64,128,256)
#chanDim = -1

In [6]:
input = Input(shape=(32, 32, 3))
input.shape

TensorShape([Dimension(None), Dimension(32), Dimension(32), Dimension(3)])

In [7]:
# input layer
x = BatchNormalization()(input)
x = Conv2D(filters=64,kernel_size=3,strides=(1,1),padding='same',use_bias=False,
          kernel_regularizer=l2(reg_str))(x)
print(x.shape)

(?, 32, 32, 64)


In [8]:
for i in range(0,len(stages)):
    stride = (1,1) if i == 0 else (2,2)
    x = residual_module(inputs=x,num_filters=filters[i+1],stride=stride,reduce=True,
                        bn_eps=bn_eps,bn_mom=bn_mom)
    
    for j in range(0,stages[i]-1):
        x = residual_module(inputs=x,num_filters=filters[i+1],stride=(1,1),
                            bn_eps=bn_eps,bn_mom=bn_mom)
print(x.shape)

(?, 8, 8, 256)


In [9]:
x = BatchNormalization(epsilon=bn_eps,momentum=bn_mom)(x)
x = Activation('relu')(x)
x = AveragePooling2D(pool_size=(8,8))(x)
print(x.shape)

x = Flatten()(x)
x = Dense(num_classes,kernel_regularizer=l2(reg_str))(x)
x = Activation('softmax')(x)
print(x.shape)
model = Model(inputs=input,outputs=x)
model.summary()

(?, 1, 1, 256)
(?, 10)


### Use what I call the ctrl + c method to training a deep learning network where we train for some number of epochs at a given learning rate, eventually notice validation performance has stalled, then ctrl + c to stop the script, adjust our learning rate, and continue training.

    - cifar10 training image size: 32 x 32 x 3
    - trainx and trainy: 50,000 images
    - testx and testy: 10,000 images

In [None]:
# load training and testing data, converting the images from integers to floats
((trainx, trainy), (testx, testy)) = cifar10.load_data()
trainx = trainx.astype('float')
testx = testx.astype('float')

# mean subtraction
mean = np.mean(trainx, axis=0)
trainx -= mean
testx -= mean

# convert the labels from integers to vectors
lb = LabelBinarizer()
trainy = lb.fit_transform(trainy)
testy = lb.transform(testy)

In [None]:
# construct the image generator for data augmentation
aug = ImageDataGenerator(width_shift_range = 0.1,
                        height_shift_range = 0.1,
                        horizontal_flip = True,
                        fill_mode = 'nearest')

In [None]:
# parameter settings
reg_str = 0.0005
bn_eps = 2e-5
bn_mom = 0.9
num_classes = 10
stages = (9,9,9)
filters = (64,64,128,256)
#chanDim = -1

### network initialization for ResNet-56

In [None]:
inputShape = (32,32,3)
inputs = Input(shape=inputShape)
print('inputs shape: {}'.format(inputs.shape),'\n')
x = BatchNormalization(epsilon=bn_eps,momentum=bn_mom)(inputs)
x = Conv2D(filters[0],(3,3),use_bias=False,padding='same',kernel_regularizer=l2(reg_str))(x)
print('x shape: {}'.format(x.shape))

### stage 1, phase 1 -> 1 residual_module() with stride = (1,1)

In [None]:
# stage 1
shortcut = x
print('shortcut shape: {}'.format(shortcut.shape),'\n')

# block 1
x = BatchNormalization(epsilon=bn_eps,momentum=bn_mom)(x)
x = Activation('relu')(x)
x = Conv2D(filters=int(64/4),kernel_size=(1,1),use_bias=False,kernel_regularizer=l2(0.0005))(x)
print('x shape: {}'.format(x.shape),'\n')

# block 2
x = BatchNormalization(epsilon=bn_eps,momentum=bn_mom)(x)
x = Activation('relu')(x)
x = Conv2D(filters=int(64/4),kernel_size=(3,3),
           padding='same',use_bias=False,kernel_regularizer=l2(0.0005))(x)
print('x shape: {}'.format(x.shape),'\n')

# block 3
x = BatchNormalization(epsilon=bn_eps,momentum=bn_mom)(x)
x = Activation('relu')(x)
x = Conv2D(filters=64,kernel_size=(1,1),use_bias=False,kernel_regularizer=l2(0.0005))(x)
print('x shape: {}'.format(x.shape),'\n')

# add
x = add([x,shortcut])
print('x shape: {}'.format(x.shape))

### stage 1, phase 2 -> 9 residual_module() with stride = (1,1)

In [None]:
# stage 1, phase 2
for j in range(0, 9-1):
    x = residual_module(inputs=x,num_filters=64,stride=(1,1),reg_str=0.0005,bn_eps=2e-5,
                       bn_mom=0.9)
    print('x shape for j = {} is {}'.format(j,x.shape),'\n')

In [None]:
for i in range(0,len(stages)):
    stride = (1,1) if i == 0 else (2,2)
    x = residual_module(inputs=x,num_filters=filters[i+1],stride=stride,
                        bn_eps=bn_eps,bn_mom=bn_mom)
    
    for j in range(0,stages[i]-1):
        x = residual_module(inputs=x,num_filters=filters[i+1],stride=(1,1),
                            bn_eps=bn_eps,bn_mom=bn_mom)

x = BatchNormalization(epsilon=bp_eps,momentum=bp_mom)(x)
x = Activation('relu')(x)
x = AveragePooling2D(pool_size=(8,8))(x)

x = Flatten()(x)
x = Dense(num_classes,kernel_regularizer=l2(reg_str))(x)
x = Activation('softmax')(x)

model = Model(inputs=inputs,outputs=x,name='resnet')

In [None]:
def residual_module(inputs, num_filters, stride, reduce=False,
                    reg_str=0.0001, bn_eps=2e-5, bn_mom=0.9):

### stage 2, phase 1 -> 1 residual_module() with stride = (2,2)

In [None]:
# stage 1
shortcut = x
print('shortcut shape: {}'.format(shortcut.shape),'\n')

# block 1
x = BatchNormalization(epsilon=bn_eps,momentum=bn_mom)(x)
x = Activation('relu')(x)
x = Conv2D(filters=int(128/4),kernel_size=(1,1),use_bias=False,kernel_regularizer=l2(0.0005))(x)
print('x shape: {}'.format(x.shape),'\n')

# block 2
x = BatchNormalization(epsilon=bn_eps,momentum=bn_mom)(x)
x = Activation('relu')(x)
x = Conv2D(filters=int(128/4),kernel_size=(3,3),
           padding='same',use_bias=False,kernel_regularizer=l2(0.0005))(x)
print('x shape: {}'.format(x.shape),'\n')

# block 3
x = BatchNormalization(epsilon=bn_eps,momentum=bn_mom)(x)
x = Activation('relu')(x)
x = Conv2D(filters=128,kernel_size=(1,1),use_bias=False,kernel_regularizer=l2(0.0005))(x)
print('x shape: {}'.format(x.shape),'\n')

# add
x = add([x,shortcut])
print('x shape: {}'.format(x.shape))

In [None]:
x = residual_module(inputs=x,num_filters=128,stride=(2,2),reg_str=0.0005,
                    bn_eps=2e-5,bn_mom=0.9)
print('x shape stage 2 - phase 1: {}'.format(x.shape))

In [None]:
x

In [None]:
shortcut

In [None]:
x.shape

In [None]:
for i in range(0,len(stages)):
    stride = (1,1) if i == 0 else (2,2)
    print(stride)

In [None]:
def residual_module(inputs, num_filters, stride, reduce=False,
                    reg_str=0.0001, bn_eps=2e-5, bn_mom=0.9):
    shortcut = inputs
    # first block of ResNet module - 1 x 1 CONV
    bn1 = BatchNormalization(epsilon=bn_eps,
                             momentum=bn_mom)(inputs)
    act1 = Activation('relu')(bn1)
    conv1 = Conv2D(filters=int(num_filters*0.25), kernel_size=(1, 1),
                   use_bias=False, kernel_regularizer=l2(reg_str))(act1)

    # second block of ResNet module - 3 x 3 CONV
    bn2 = BatchNormalization(epsilon=bn_eps,
                             momentum=bn_mom)(conv1)
    act2 = Activation('relu')(bn2)
    conv2 = Conv2D(filters=int(num_filters*0.25), kernel_size=(3, 3),
                   strides=stride, padding='same', use_bias=False,
                   kernel_regularizer=l2(reg_str))(act2)

# third and final block of ResNet module - 1 x 1 CONV
    bn3 = BatchNormalization(epsilon=bn_eps,
                             momentum=bn_mom)(conv2)
    act3 = Activation('relu')(bn3)
    conv3 = Conv2D(filters=num_filters, kernel_size=(1, 1), use_bias=False,
                   kernel_regularizer=l2(reg_str))(act3)

# if we are to reduce the spatial size, apply a CONV layer to the shortcut
    if reduce:
        shortcut = Conv2D(filters=num_filters, kernel_size=(1, 1),
                          strides=stride, use_bias=False,
                          kernel_regularizer=l2(reg_str))(act1)

# add together the shortcut and the final CONV
    x = add([conv3, shortcut])

# return the addition as the output of the ResNet module
    return x

In [None]:
def build(width, height, depth, num_classes, stages, num_filters,
          reg_str=0.0001, bn_eps=2e-5, bn_mom=0.9, dataset='cifar'):

In [None]:
# model compilation
opt = SGD(lr=1e-1) # learning rate = 0.1

model = build(width=32,height=32,depth=3,num_classes=10,stages=(9,9,9),
             num_filters=(64,64,128,256),reg_str=0.0005)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

In [None]:
# summary of resnet architecture
model.summary()