# Residual Networks



In [1]:
# Import library
import numpy as np
from keras import layers
from keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D
from keras.models import Model, load_model
from keras.preprocessing import image
from keras.utils import layer_utils
from keras.utils.data_utils import get_file
from keras.applications.imagenet_utils import preprocess_input
import pydot
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from keras.utils import plot_model
from resnets_utils import *
from keras.initializers import glorot_uniform
import scipy.misc
from matplotlib.pyplot import imshow
%matplotlib inline

import keras.backend as K
K.set_image_data_format('channels_last')
K.set_learning_phase(1)

Using TensorFlow backend.


In [2]:
### Reset TensorFlow - Identity Block
tf.reset_default_graph()

with tf.Session() as test:
    np.random.seed(1)
    A_prev = tf.placeholder("float", [3, 4, 4, 6])
    X = np.random.randn(3, 4, 4, 6)    
    
    # defining name basis
    conv_name_base = 'res1_a_branch'
    bn_name_base = 'bn1_a_branch'
    
    # First component of main path
    X1 = Conv2D(filters = 2, kernel_size = (1, 1), strides = (1,1), padding = 'valid', 
               name = conv_name_base + '2a', kernel_initializer = glorot_uniform(seed=0))(A_prev)
    X2 = BatchNormalization(axis = 3, name = bn_name_base + '2a')(X1)
    X3 = Activation('relu')(X2)
    
    # Second component of main path 
    X4 = Conv2D(filters = 4, kernel_size = (2, 2), strides = (1,1), padding = 'same', 
               name = conv_name_base + '2b', kernel_initializer = glorot_uniform(seed=0))(X3)
    X5 = BatchNormalization(axis = 3, name = bn_name_base + '2b')(X4)
    X6 = Activation('relu')(X5)

    # Third component of main path 
    X7 = Conv2D(filters = 6, kernel_size = (1, 1), strides = (1,1), padding = 'valid', 
               name = conv_name_base + '2c', kernel_initializer = glorot_uniform(seed=0))(X6)
    X8 = BatchNormalization(axis = 3, name = bn_name_base + '2c')(X7)

    # Final step: Add shortcut value to main path, and pass it through a RELU activation
    X9 = Add()([X8, A_prev])
    X10 = Activation('relu')(X9)
    
    #### Run 
    test.run(tf.global_variables_initializer())
    out2 = test.run([X2], feed_dict={A_prev: X, K.learning_phase(): 0})
    out10 = test.run([X10], feed_dict={A_prev: X, K.learning_phase(): 0})
    print("X = " + str(X[0][1][1]))
    print("out2 = " + str(out2[0][1][1][0]))
    print("out10 = " + str(out10[0][1][1][0]))

X = [-0.69166075 -0.39675353 -0.6871727  -0.84520564 -0.67124613 -0.0126646 ]
out2 = [-2.6157701  -1.72260296]
out10 = [ 0.19716813  0.          1.35612273  2.17130733  0.          1.33249867]


In [3]:
print("X.shape: ", X.shape)
print("A_prev: ", A_prev)
print("X1: ", X1)
print("X2: ", X2)
print("X3: ", X3)
print("X4: ", X4)
print("X5: ", X5)
print("X6: ", X6)
print("X7: ", X7)
print("X8: ", X8)
print("X9: ", X9)
print("X10: ", X10)

X.shape:  (3, 4, 4, 6)
A_prev:  Tensor("Placeholder:0", shape=(3, 4, 4, 6), dtype=float32)
X1:  Tensor("res1_a_branch2a/BiasAdd:0", shape=(3, 4, 4, 2), dtype=float32)
X2:  Tensor("bn1_a_branch2a/cond/Merge:0", shape=(3, 4, 4, 2), dtype=float32)
X3:  Tensor("activation_1/Relu:0", shape=(3, 4, 4, 2), dtype=float32)
X4:  Tensor("res1_a_branch2b/BiasAdd:0", shape=(3, 4, 4, 4), dtype=float32)
X5:  Tensor("bn1_a_branch2b/cond/Merge:0", shape=(3, 4, 4, 4), dtype=float32)
X6:  Tensor("activation_2/Relu:0", shape=(3, 4, 4, 4), dtype=float32)
X7:  Tensor("res1_a_branch2c/BiasAdd:0", shape=(3, 4, 4, 6), dtype=float32)
X8:  Tensor("bn1_a_branch2c/cond/Merge:0", shape=(3, 4, 4, 6), dtype=float32)
X9:  Tensor("add_1/add:0", shape=(3, 4, 4, 6), dtype=float32)
X10:  Tensor("activation_3/Relu:0", shape=(3, 4, 4, 6), dtype=float32)


In [4]:
#### Reset TensorFlow - Convolutional block
tf.reset_default_graph()

with tf.Session() as test:
    #### Set up Placeholder 
    np.random.seed(1)
    A_prev = tf.placeholder("float", [3, 4, 4, 6])
    
    #### Prepare X
    X = np.random.randn(3, 4, 4, 6)
   
    # defining name basis
    conv_name_base = 'res_1_a_branch'
    bn_name_base = 'bn_1_a_branch'

    ##### MAIN PATH #####
    # First component of main path 
    X1 = Conv2D(2, (1, 1), strides = (2,2), name = conv_name_base + '2a', kernel_initializer = glorot_uniform(seed=0))(A_prev)
    X2 = BatchNormalization(axis = 3, name = bn_name_base + '2a')(X1)
    X3 = Activation('relu')(X2)

    # Second component of main path 
    X4 = Conv2D(4, (2, 2), strides = (1,1), name = conv_name_base + '2b', 
                kernel_initializer = glorot_uniform(seed=0), padding="same")(X3)
    X5 = BatchNormalization(axis = 3, name = bn_name_base + '2b')(X4)
    X6 = Activation('relu')(X5)

    # Third component of main path 
    X7 = Conv2D(6, (1, 1), strides = (1,1), name = conv_name_base + '2c', kernel_initializer = glorot_uniform(seed=0))(X6)
    X8 = BatchNormalization(axis = 3, name = bn_name_base + '2c')(X7)

    ##### SHORTCUT PATH #### 
    A_prev2 = Conv2D(6, (1, 1), strides = (2,2), name = conv_name_base + '1', 
                        kernel_initializer = glorot_uniform(seed=0))(A_prev)
    A_prev3 = BatchNormalization(axis = 3, name = bn_name_base + '1')(A_prev2)

    # Final step: Add shortcut value to main path, and pass it through a RELU activation
    X9 = Add()([X8, A_prev3])
    X10 = Activation('relu')(X9)

    ### Run Test
    test.run(tf.global_variables_initializer())
    print("X = " + str(X[0][1][1][0]))    
    out2 = test.run([X2], feed_dict={A_prev: X, K.learning_phase(): 0})
    print("out2 = " + str(out2[0][1][1][0]))
    out8 = test.run([X8], feed_dict={A_prev: X, K.learning_phase(): 0})
    print("out8 = " + str(out8[0][1][1][0]))
    out_A_prev3 = test.run([A_prev3], feed_dict={A_prev: X, K.learning_phase(): 0})
    print("out_A_prev3 = " + str(out_A_prev3[0][1][1][0]))
    out9 = test.run([X9], feed_dict={A_prev: X, K.learning_phase(): 0})
    print("out9 = " + str(out9[0][1][1][0]))
    out10 = test.run([X10], feed_dict={A_prev: X, K.learning_phase(): 0})
    print("out10 = " + str(out10[0][1][1][0]))
    

X = -0.691660751725
out2 = [ 0.60604793  0.17718017]
out8 = [ 0.12865095  0.06727595  0.30671415  0.07085409  0.58566171  0.0415115 ]
out_A_prev3 = [-0.03846632  1.16762173  0.16150603 -0.03413649 -0.82349372  0.61365455]
out9 = [ 0.09018463  1.23489773  0.46822017  0.0367176  -0.23783201  0.65516603]
out10 = [ 0.09018463  1.23489773  0.46822017  0.0367176   0.          0.65516603]


In [5]:
print("X.shape: ", X.shape)
print("A_prev: ", A_prev)
print("A_prev2: ", A_prev2)
print("A_prev3: ", A_prev3)
print("X1: ", X1)
print("X2: ", X2)
print("X3: ", X3)
print("X4: ", X4)
print("X5: ", X5)
print("X6: ", X6)
print("X7: ", X7)
print("X8: ", X8)
print("X9: ", X9)
print("X10: ", X10)

X.shape:  (3, 4, 4, 6)
A_prev:  Tensor("Placeholder:0", shape=(3, 4, 4, 6), dtype=float32)
A_prev2:  Tensor("res_1_a_branch1/BiasAdd:0", shape=(3, 2, 2, 6), dtype=float32)
A_prev3:  Tensor("bn_1_a_branch1/cond/Merge:0", shape=(3, 2, 2, 6), dtype=float32)
X1:  Tensor("res_1_a_branch2a/BiasAdd:0", shape=(3, 2, 2, 2), dtype=float32)
X2:  Tensor("bn_1_a_branch2a/cond/Merge:0", shape=(3, 2, 2, 2), dtype=float32)
X3:  Tensor("activation_1/Relu:0", shape=(3, 2, 2, 2), dtype=float32)
X4:  Tensor("res_1_a_branch2b/BiasAdd:0", shape=(3, 2, 2, 4), dtype=float32)
X5:  Tensor("bn_1_a_branch2b/cond/Merge:0", shape=(3, 2, 2, 4), dtype=float32)
X6:  Tensor("activation_2/Relu:0", shape=(3, 2, 2, 4), dtype=float32)
X7:  Tensor("res_1_a_branch2c/BiasAdd:0", shape=(3, 2, 2, 6), dtype=float32)
X8:  Tensor("bn_1_a_branch2c/cond/Merge:0", shape=(3, 2, 2, 6), dtype=float32)
X9:  Tensor("add_1/add:0", shape=(3, 2, 2, 6), dtype=float32)
X10:  Tensor("activation_3/Relu:0", shape=(3, 2, 2, 6), dtype=float32)


In [6]:
# GRADED FUNCTION: identity_block

def identity_block(X, f, filters, stage, block):
    """
    Implementation of the identity block as defined in Figure 3
    
    Arguments:
    X -- input tensor of shape (m, n_H_prev, n_W_prev, n_C_prev)
    f -- integer, specifying the shape of the middle CONV's window for the main path
    filters -- python list of integers, defining the number of filters in the CONV layers of the main path
    stage -- integer, used to name the layers, depending on their position in the network
    block -- string/character, used to name the layers, depending on their position in the network
    
    Returns:
    X -- output of the identity block, tensor of shape (n_H, n_W, n_C)
    """
    
    # defining name basis
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'
    
    # Retrieve Filters
    F1, F2, F3 = filters
    
    # Save the input value. You'll need this later to add back to the main path. 
    X_shortcut = X
    
    # First component of main path
    X = Conv2D(filters = F1, kernel_size = (1, 1), strides = (1,1), padding = 'valid', 
               name = conv_name_base + '2a', kernel_initializer = glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis = 3, name = bn_name_base + '2a')(X)
    X = Activation('relu')(X)
    
    ### START CODE HERE ###
    
    # Second component of main path (≈3 lines)
    X = Conv2D(filters = F2, kernel_size = (f, f), strides = (1,1), padding = 'same', 
               name = conv_name_base + '2b', kernel_initializer = glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis = 3, name = bn_name_base + '2b')(X)
    X = Activation('relu')(X)

    # Third component of main path (≈2 lines)
    X = Conv2D(filters = F3, kernel_size = (1, 1), strides = (1,1), padding = 'valid', 
               name = conv_name_base + '2c', kernel_initializer = glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis = 3, name = bn_name_base + '2c')(X)

    # Final step: Add shortcut value to main path, and pass it through a RELU activation (≈2 lines)
    X = Add()([X, X_shortcut])
    X = Activation('relu')(X)
    
    ### END CODE HERE ###
    
    return X

In [7]:
# GRADED FUNCTION: convolutional_block

def convolutional_block(X, f, filters, stage, block, s = 2):
    """
    Implementation of the convolutional block as defined in Figure 4
    
    Arguments:
    X -- input tensor of shape (m, n_H_prev, n_W_prev, n_C_prev)
    f -- integer, specifying the shape of the middle CONV's window for the main path
    filters -- python list of integers, defining the number of filters in the CONV layers of the main path
    stage -- integer, used to name the layers, depending on their position in the network
    block -- string/character, used to name the layers, depending on their position in the network
    s -- Integer, specifying the stride to be used
    
    Returns:
    X -- output of the convolutional block, tensor of shape (n_H, n_W, n_C)
    """
    
    # defining name basis
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'
    
    # Retrieve Filters
    F1, F2, F3 = filters
    
    # Save the input value
    X_shortcut = X


    ##### MAIN PATH #####
    # First component of main path 
    X = Conv2D(F1, (1, 1), strides = (s,s), name = conv_name_base + '2a', kernel_initializer = glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis = 3, name = bn_name_base + '2a')(X)
    X = Activation('relu')(X)
    
    ### START CODE HERE ###

    # Second component of main path (≈3 lines)
    X = Conv2D(F2, (f, f), strides = (1,1), name = conv_name_base + '2b', kernel_initializer = glorot_uniform(seed=0), padding="same")(X)
    X = BatchNormalization(axis = 3, name = bn_name_base + '2b')(X)
    X = Activation('relu')(X)

    # Third component of main path (≈2 lines)
    X = Conv2D(F3, (1, 1), strides = (1,1), name = conv_name_base + '2c', kernel_initializer = glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis = 3, name = bn_name_base + '2c')(X)

    ##### SHORTCUT PATH #### (≈2 lines)
    X_shortcut = Conv2D(F3, (1, 1), strides = (s,s), name = conv_name_base + '1', kernel_initializer = glorot_uniform(seed=0))(X_shortcut)
    X_shortcut = BatchNormalization(axis = 3, name = bn_name_base + '1')(X_shortcut)

    # Final step: Add shortcut value to main path, and pass it through a RELU activation (≈2 lines)
    X = Add()([X, X_shortcut])
    X = Activation('relu')(X)
    
    ### END CODE HERE ###
    
    return X

In [15]:
# GRADED FUNCTION: ResNet50 using Identity and convolutional block

input_shape = (64, 64, 3)
classes = 6
    
# Define the input as a tensor with shape input_shape
X_input = Input(input_shape)

    
# Zero-Padding
XC2 = ZeroPadding2D((3, 3))(X_input)
    
# Stage 1
XC3 = Conv2D(64, (7, 7), strides = (2, 2), name = 'conv1', kernel_initializer = glorot_uniform(seed=0))(XC2)
XC4 = BatchNormalization(axis = 3, name = 'bn_conv1')(XC3)
XC5 = Activation('relu')(XC4)
XC6 = MaxPooling2D((3, 3), strides=(2, 2))(XC5)

# Stage 2
XC7 = convolutional_block(XC6, f = 3, filters = [64, 64, 256], stage = 2, block='a', s = 1)
XC8 = identity_block(XC7, 3, [64, 64, 256], stage=2, block='b')
XC9 = identity_block(XC8, 3, [64, 64, 256], stage=2, block='c')

# Stage 3 
XC10 = convolutional_block(XC9, f = 3, filters = [128, 128, 512], stage = 3, block='a', s = 2)
XC11 = identity_block(XC10, 3, [128, 128, 512], stage=3, block='b')
XC12 = identity_block(XC11, 3, [128, 128, 512], stage=3, block='c')
XC13 = identity_block(XC12, 3, [128, 128, 512], stage=3, block='d')

# Stage 4 
XC14 = convolutional_block(XC13, f = 3, filters = [256, 256, 1024], stage = 4, block='a', s = 2)
XC15 = identity_block(XC14, 3, [256, 256, 1024], stage=4, block='b')
XC16 = identity_block(XC15, 3, [256, 256, 1024], stage=4, block='c')
XC17 = identity_block(XC16, 3, [256, 256, 1024], stage=4, block='d')
XC18 = identity_block(XC17, 3, [256, 256, 1024], stage=4, block='e')
XC19 = identity_block(XC18, 3, [256, 256, 1024], stage=4, block='f')

# Stage 5 
XC20 = convolutional_block(XC19, f = 3, filters = [512, 512, 2048], stage = 5, block='a', s = 2)
# identity_block should have filter size [512, 512, 2048], NOT [256, 256, 2048] as stage5 description above
XC21 = identity_block(XC20, 3, [512, 512, 2048], stage=5, block='b')
XC22 = identity_block(XC21, 3, [512, 512, 2048], stage=5, block='c')

# AVGPOOL 
XC23 = AveragePooling2D((2,2), name="avg_pool")(XC22)
    
# output layer
XC24 = Flatten()(XC23)
XC25 = Dense(classes, activation='softmax', name='fc' + str(classes), kernel_initializer = glorot_uniform(seed=0))(XC24)
    
# Create model
model = Model(inputs = X_input, outputs = XC25, name='ResNet50')

In [16]:
## Compile Keras model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Check if layer dimension looks fine
#model.summary()

In [None]:
def load_dataset():
    train_dataset = h5py.File('datasets/train_signs.h5', "r")
    train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
    train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels

    test_dataset = h5py.File('datasets/test_signs.h5', "r")
    test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
    test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels

    classes = np.array(test_dataset["list_classes"][:]) # the list of classes
    
    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
    
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes

In [11]:
X_train_orig, Y_train_orig, X_test_orig, Y_test_orig, classes = load_dataset()

# Normalize image vectors
X_train = X_train_orig/255.
X_test = X_test_orig/255.

# Convert training and test labels to one hot matrices
Y_train = convert_to_one_hot(Y_train_orig, 6).T
Y_test = convert_to_one_hot(Y_test_orig, 6).T

print ("number of training examples = " + str(X_train.shape[0]))
print ("number of test examples = " + str(X_test.shape[0]))
print ("X_train shape: " + str(X_train.shape))
print ("Y_train shape: " + str(Y_train.shape))
print ("X_test shape: " + str(X_test.shape))
print ("Y_test shape: " + str(Y_test.shape))

number of training examples = 1080
number of test examples = 120
X_train shape: (1080, 64, 64, 3)
Y_train shape: (1080, 6)
X_test shape: (120, 64, 64, 3)
Y_test shape: (120, 6)


In [17]:
# Train the model
model.fit(X_train, Y_train, epochs = 2, batch_size = 32)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x213d188d2b0>

In [18]:
### Validate the modle with test data
preds = model.evaluate(X_test, Y_test)
print ("Loss = " + str(preds[0]))
print ("Test Accuracy = " + str(preds[1]))

Loss = 2.70929085414
Test Accuracy = 0.358333334327


In [23]:
### Summary model
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 64, 64, 3)    0                                            
__________________________________________________________________________________________________
zero_padding2d_2 (ZeroPadding2D (None, 70, 70, 3)    0           input_2[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 32, 32, 64)   9472        zero_padding2d_2[0][0]           
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 32, 32, 64)   256         conv1[0][0]                      
__________________________________________________________________________________________________
activation

bn4c_branch2a (BatchNormalizati (None, 4, 4, 256)    1024        res4c_branch2a[0][0]             
__________________________________________________________________________________________________
activation_81 (Activation)      (None, 4, 4, 256)    0           bn4c_branch2a[0][0]              
__________________________________________________________________________________________________
res4c_branch2b (Conv2D)         (None, 4, 4, 256)    590080      activation_81[0][0]              
__________________________________________________________________________________________________
bn4c_branch2b (BatchNormalizati (None, 4, 4, 256)    1024        res4c_branch2b[0][0]             
__________________________________________________________________________________________________
activation_82 (Activation)      (None, 4, 4, 256)    0           bn4c_branch2b[0][0]              
__________________________________________________________________________________________________
res4c_bran

In [22]:
### create, compile and train model 1 
model1 = Model(inputs = X_input, outputs = XC25, name='ResNet50')
model1.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model1.fit(X_train, Y_train, epochs = 20, batch_size = 32)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2138d88df60>

In [24]:
## Validate model 1 with test data
preds1 = model1.evaluate(X_test, Y_test)
print ("Loss = " + str(preds1[0]))
print ("Test Accuracy = " + str(preds1[1]))

Loss = 7.30493329366
Test Accuracy = 0.25833333234


In [39]:
## Model 1 Summary
model1.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 64, 64, 3)    0                                            
__________________________________________________________________________________________________
zero_padding2d_2 (ZeroPadding2D (None, 70, 70, 3)    0           input_2[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 32, 32, 64)   9472        zero_padding2d_2[0][0]           
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 32, 32, 64)   256         conv1[0][0]                      
__________________________________________________________________________________________________
activation

__________________________________________________________________________________________________
bn4d_branch2c (BatchNormalizati (None, 4, 4, 1024)   4096        res4d_branch2c[0][0]             
__________________________________________________________________________________________________
add_28 (Add)                    (None, 4, 4, 1024)   0           bn4d_branch2c[0][0]              
                                                                 activation_83[0][0]              
__________________________________________________________________________________________________
activation_86 (Activation)      (None, 4, 4, 1024)   0           add_28[0][0]                     
__________________________________________________________________________________________________
res4e_branch2a (Conv2D)         (None, 4, 4, 256)    262400      activation_86[0][0]              
__________________________________________________________________________________________________
bn4e_branc