In [None]:
import numpy as np
from keras import layers
from keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D
from keras.models import Model, load_model
from keras.preprocessing import image
from keras.utils import layer_utils
from keras.utils.data_utils import get_file
from keras.applications.imagenet_utils import preprocess_input
import pydot
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from keras.utils import plot_model
from resnets_utils import *
from keras.initializers import glorot_uniform
import scipy.misc
from matplotlib.pyplot import imshow
%matplotlib inline

import keras.backend as K
K.set_image_data_format('channels_last')
K.set_learning_phase(1)

In [None]:
def identity_block(X, f, filters, stage, block):
    """
    
    Arguments:
    X -- input tensor of shape (m, n_H_prev, n_W_prev, n_C_prev)
    f -- integer, specifying the shape of the middle CONV's window for the main path
    filters -- python list of integers, defining the number of filters in the CONV layers of the main path
    stage -- integer, used to name the layers, depending on their position in the network
    block -- string/character, used to name the layers, depending on their position in the network
    
    Returns:
    X -- output of the identity block, tensor of shape (n_H, n_W, n_C)
    """
    
    # defining name basis
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'
    
   
    F1, F2, F3 = filters
    
     
    X_shortcut = X
    
    # First component of main path
    X = Conv2D(filters = F1, kernel_size = (1, 1), strides = (1,1), padding = 'valid', name = conv_name_base + '2a', kernel_initializer = glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis = 3, name = bn_name_base + '2a')(X)
    X = Activation('relu')(X)
    
    
    
    # Second component of main path 
    X = Conv2D(filters= F2, kernel_size= (f,f) , strides=(1,1), padding = 'same', name= conv_name_base + '2b', kernel_initializer = glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name= bn_name_base + '2b')(X)
    X = Activation('relu')(X)

    # Third component of main path 
    X = Conv2D(filters = F3, kernel_size=(1,1), strides=(1,1), padding='valid', name=conv_name_base + '2c', kernel_initializer = glorot_uniform(seed=0) )(X)
    X = BatchNormalization(axis=3, name= bn_name_base + '2c')(X)

    # Final step
    X = Add()([X,X_shortcut])
    X = Activation('relu')(X)
    
    
    
    return X

In [None]:
with tf.Session() as test:
    np.random.seed(1)
    A_prev = tf.placeholder("float", [3, 4, 4, 6])
    X = np.random.randn(3, 4, 4, 6)
    A = identity_block(A_prev, f = 2, filters = [2, 4, 6], stage = 1, block = 'a')
    test.run(tf.global_variables_initializer())
    out = test.run([A], feed_dict={A_prev: X, K.learning_phase(): 0})
    print("out = " + str(out[0][1][1][0]))

In [None]:
def convolutional_block(X, f, filters, stage, block, s = 2):
    """
    
    
    Arguments:
    X -- input tensor of shape (m, n_H_prev, n_W_prev, n_C_prev)
    f -- integer, specifying the shape of the middle CONV's window for the main path
    filters -- python list of integers, defining the number of filters in the CONV layers of the main path
    stage -- integer, used to name the layers, depending on their position in the network
    block -- string/character, used to name the layers, depending on their position in the network
    s -- Integer, specifying the stride to be used
    
    Returns:
    X -- output of the convolutional block, tensor of shape (n_H, n_W, n_C)
    """
    
    # defining name basis
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'
    
    
    F1, F2, F3 = filters
    
    
    X_shortcut = X


    
    # First component of main path 
    X = Conv2D(filters=F1, kernel_size=(1, 1), strides = (s,s),padding='valid', name = conv_name_base + '2a', kernel_initializer = glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis = 3, name = bn_name_base + '2a')(X)
    X = Activation('relu')(X)
    
    

    # Second component of main path 
    X = Conv2D(filters=F2, kernel_size=(f,f), strides=(1,1),padding='same',name=conv_name_base + '2b', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3,name=bn_name_base + '2b')(X)
    X = Activation('relu')(X)

    # Third component of main path 
    X = Conv2D(filters=F3, kernel_size=(1,1), strides=(1,1),padding='valid',name=conv_name_base + '2c', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3,name=bn_name_base + '2c')(X)

    
    X_shortcut = Conv2D(filters=F3, kernel_size=(1,1), strides=(s,s),padding='valid',name=conv_name_base + '1', kernel_initializer=glorot_uniform(seed=0))(X_shortcut)
    X_shortcut = BatchNormalization(axis=3,name=bn_name_base + '1')(X_shortcut)

    # Final step
    X = Add()([X,X_shortcut])
    X = Activation('relu')(X)
    
    
    return X

In [None]:
tf.reset_default_graph()

with tf.Session() as test:
    np.random.seed(1)
    A_prev = tf.placeholder("float", [3, 4, 4, 6])
    X = np.random.randn(3, 4, 4, 6)
    A = convolutional_block(A_prev, f = 2, filters = [2, 4, 6], stage = 1, block = 'a')
    test.run(tf.global_variables_initializer())
    out = test.run([A], feed_dict={A_prev: X, K.learning_phase(): 0})
    print("out = " + str(out[0][1][1][0]))

In [None]:
def ResNet101(input_shape = (64, 64, 3), classes = 6):
    """
    Implementation of the popular ResNet50 the following architecture:
    CONV2D -> BATCHNORM -> RELU -> MAXPOOL -> CONVBLOCK -> IDBLOCK*2 -> CONVBLOCK -> IDBLOCK*3
    -> CONVBLOCK -> IDBLOCK*5 -> CONVBLOCK -> IDBLOCK*2 -> AVGPOOL -> TOPLAYER

    Arguments:
    input_shape -- shape of the images of the dataset
    classes -- integer, number of classes

    Returns:
    model -- a Model() instance in Keras
    """
    
    # Define the input as a tensor with shape input_shape
    X_input = Input(input_shape)

    
    # Zero-Padding
    X = ZeroPadding2D((3, 3))(X_input)
    
    # Stage 1
    X = Conv2D(filters=64, kernel_size=(7, 7), strides = (2, 2), name = 'conv1', kernel_initializer = glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis = 3, name = 'bn_conv1')(X)
    X = Activation('relu')(X)
    X = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(X)

    # Stage 2
    X = convolutional_block(X, f = 3, filters = [64, 64, 256], stage = 2, block='a', s = 1)
    X = identity_block(X, 3, [64, 64, 256], stage=2, block='b')
    X = identity_block(X, 3, [64, 64, 256], stage=2, block='c')

    

    # Stage 3 
    X = convolutional_block(X, f=3,filters=[128,128,512],stage=3, block='a', s=2)
    X = identity_block(X, f=3, filters=[128,128,512],stage=3, block= 'b')
    X = identity_block(X,f=3, filters=[128,128,512], stage=3, block='c')
    X = identity_block(X,f=3, filters=[128,128,512], stage=3, block='d')

    # Stage 4 
    X = convolutional_block(X, f=3,filters=[256,256,1024],stage=4,block='a', s=2)
    X = identity_block(X, f=3, filters=[256,256,1024],stage=4, block= 'b')
    X = identity_block(X, f=3, filters=[256,256,1024],stage=4, block= 'c')
    X = identity_block(X, f=3, filters=[256,256,1024],stage=4, block= 'd')
    X = identity_block(X, f=3, filters=[256,256,1024],stage=4, block= 'e')
    X = identity_block(X, f=3, filters=[256,256,1024],stage=4, block= 'f')
    X = identity_block(X, f=3, filters=[256,256,1024],stage=4, block= 'g')
    X = identity_block(X, f=3, filters=[256,256,1024],stage=4, block= 'h')
    X = identity_block(X, f=3, filters=[256,256,1024],stage=4, block= 'i')
    X = identity_block(X, f=3, filters=[256,256,1024],stage=4, block= 'j')
    X = identity_block(X, f=3, filters=[256,256,1024],stage=4, block= 'k')
    X = identity_block(X, f=3, filters=[256,256,1024],stage=4, block= 'l')
    X = identity_block(X, f=3, filters=[256,256,1024],stage=4, block= 'm')
    X = identity_block(X, f=3, filters=[256,256,1024],stage=4, block= 'n')
    X = identity_block(X, f=3, filters=[256,256,1024],stage=4, block= 'o')
    X = identity_block(X, f=3, filters=[256,256,1024],stage=4, block= 'p')
    X = identity_block(X, f=3, filters=[256,256,1024],stage=4, block= 'q')
    X = identity_block(X, f=3, filters=[256,256,1024],stage=4, block= 'r')
    X = identity_block(X, f=3, filters=[256,256,1024],stage=4, block= 's')
    X = identity_block(X, f=3, filters=[256,256,1024],stage=4, block= 't')
    X = identity_block(X, f=3, filters=[256,256,1024],stage=4, block= 'u')
    X = identity_block(X, f=3, filters=[256,256,1024],stage=4, block= 'v')
    X = identity_block(X, f=3, filters=[256,256,1024],stage=4, block= 'w')

    # Stage 5 (≈3 lines)
    X = convolutional_block(X, f=3, filters=[512,512,2048], stage=5, block='a', s=2)
    X = identity_block(X, f=3, filters=[512,512,2048], stage=5, block='b')
    X = identity_block(X, f=3, filters=[512,512,2048], stage=5, block='c')

    
    X = AveragePooling2D(pool_size=(2,2))(X)
    
    

    # output layer
    X = Flatten()(X)
    X = Dense(classes, activation='softmax', name='fc' + str(classes), kernel_initializer = glorot_uniform(seed=0))(X)
    
    
    
    model = Model(inputs = X_input, outputs = X, name='ResNet50')

    return model

In [None]:
model = ResNet101(input_shape = (64, 64, 3), classes = 6)

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
X_train_orig, Y_train_orig, X_test_orig, Y_test_orig, classes = load_dataset()

# Normalize image vectors
X_train = X_train_orig/255.
X_test = X_test_orig/255.

# Convert training and test labels to one hot matrices
Y_train = convert_to_one_hot(Y_train_orig, 6).T
Y_test = convert_to_one_hot(Y_test_orig, 6).T

print ("number of training examples = " + str(X_train.shape[0]))
print ("number of test examples = " + str(X_test.shape[0]))
print ("X_train shape: " + str(X_train.shape))
print ("Y_train shape: " + str(Y_train.shape))
print ("X_test shape: " + str(X_test.shape))
print ("Y_test shape: " + str(Y_test.shape))

In [None]:
model.fit(X_train, Y_train, epochs = 15, batch_size = 32)

In [None]:
preds = model.evaluate(X_test, Y_test)
print ("Loss = " + str(preds[0]))
print ("Test Accuracy = " + str(preds[1]))

In [None]:
#References:
#This code presents the ResNet algorithm due to He et al. (2015). The implementation here also took significant inspiration and follows the structure given in the github repository of Francois Chollet: 
#Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun - Deep Residual Learning for Image Recognition (2015)
Francois Chollet's github repository: https://github.com/fchollet/deep-learning-models/blob/master/resnet50.py