# ResNet 50

A TensorFlow reconstruction of ResNet50. Some useful links:

* [KERAS implementation](https://github.com/fchollet/deep-learning-models/blob/master/resnet50.py)
* [NetScope visualisation of the network](http://ethereon.github.io/netscope/#/gist/db945b393d40bfa26006)
* [KERAS pre-trained weights (h5)](https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels.h5)

In [3]:
import os

import numpy as np

import h5py

import tensorflow as tf
print tf.__version__

1.2.1


---
### Data

In [4]:
PARAMS_PATH = ''
PARAMS_FILE = PARAMS_PATH + 'resnet50_weights_tf_dim_ordering_tf_kernels.h5'

# LOG_PATH = ''
# LOG_DIRECTORY = LOG_PATH + 'Tensorflow/model-resnet50/logs'
# if not os.path.exists(LOG_DIRECTORY):
#     os.makedirs(LOG_DIRECTORY)

In [5]:
data_h5 = h5py.File(PARAMS_FILE, 'r')

variables = [ key for key in data_h5.keys() if len(data_h5[key])>0 ]
print variables

[u'bn2a_branch1', u'bn2a_branch2a', u'bn2a_branch2b', u'bn2a_branch2c', u'bn2b_branch2a', u'bn2b_branch2b', u'bn2b_branch2c', u'bn2c_branch2a', u'bn2c_branch2b', u'bn2c_branch2c', u'bn3a_branch1', u'bn3a_branch2a', u'bn3a_branch2b', u'bn3a_branch2c', u'bn3b_branch2a', u'bn3b_branch2b', u'bn3b_branch2c', u'bn3c_branch2a', u'bn3c_branch2b', u'bn3c_branch2c', u'bn3d_branch2a', u'bn3d_branch2b', u'bn3d_branch2c', u'bn4a_branch1', u'bn4a_branch2a', u'bn4a_branch2b', u'bn4a_branch2c', u'bn4b_branch2a', u'bn4b_branch2b', u'bn4b_branch2c', u'bn4c_branch2a', u'bn4c_branch2b', u'bn4c_branch2c', u'bn4d_branch2a', u'bn4d_branch2b', u'bn4d_branch2c', u'bn4e_branch2a', u'bn4e_branch2b', u'bn4e_branch2c', u'bn4f_branch2a', u'bn4f_branch2b', u'bn4f_branch2c', u'bn5a_branch1', u'bn5a_branch2a', u'bn5a_branch2b', u'bn5a_branch2c', u'bn5b_branch2a', u'bn5b_branch2b', u'bn5b_branch2c', u'bn5c_branch2a', u'bn5c_branch2b', u'bn5c_branch2c', u'bn_conv1', u'conv1', u'fc1000', u'res2a_branch1', u'res2a_branch2

In [6]:
print list(data_h5['bn2a_branch1'])

[u'bn2a_branch1_beta:0', u'bn2a_branch1_gamma:0', u'bn2a_branch1_running_mean:0', u'bn2a_branch1_running_std:0']


In [7]:
print data_h5['bn2a_branch1']['bn2a_branch1_beta:0'].shape
print data_h5['bn2a_branch1']['bn2a_branch1_gamma:0'].value[:5]

(256,)
[ 0.30266267  1.10643625  1.773862    0.81150401  1.0933665 ]


#### Model Components

In [12]:
# ----------------------- #
def dense_layer(input_layer, data, layer_name):
    with tf.variable_scope(layer_name):
        W = tf.constant( data[layer_name][layer_name+'_W:0'] )
        b = data[layer_name][layer_name+'_b:0']
        b = tf.constant( np.reshape(b, (b.shape[0])) )
        X = tf.matmul(input_layer, W)
        X = tf.nn.bias_add(X, b)
        return X
    # ----------------------- #


# ----------------------- #
def conv_layer(input_layer, data, layer_name, strides=[1,1,1,1], padding='VALID', lock=True):
    with tf.variable_scope(layer_name):
        W_val = np.array(data[layer_name][layer_name+'_W:0']).astype(np.float32)
        b_val = np.array(data[layer_name][layer_name+'_b:0']).astype(np.float32)
        if lock:
            W = tf.constant( W_val )
            b = tf.constant( np.reshape(b_val, (b_val.shape[0])) )
        else:
            # As variables.
            W = tf.get_variable(layer_name+'_W', shape=W_val.shape, 
                                initializer=tf.constant_initializer(W_val), dtype=tf.float32)
            b = tf.get_variable(layer_name+'_b', shape=b_val.shape, 
                                initializer=tf.constant_initializer(b_val), dtype=tf.float32)
        X = tf.nn.conv2d(input_layer, filter=W, strides=strides, padding=padding, name=layer_name)
        X = tf.nn.bias_add(X, b)
        return X
    # ----------------------- #


# ----------------------- #
def batch_norm_layer(input_layer, data, layer_name, lock=True):
    with tf.variable_scope(layer_name):
        mean_val = np.array(data[layer_name][layer_name+'_running_mean:0']).astype(np.float32)
        std_val = np.array(data[layer_name][layer_name+'_running_std:0']).astype(np.float32)
        beta_val = np.array(data[layer_name][layer_name+'_beta:0']).astype(np.float32)
        gamma_val = np.array(data[layer_name][layer_name+'_gamma:0']).astype(np.float32)
        if lock:
            mean = tf.constant(mean_val)
            std = tf.constant(std_val)
            beta = tf.constant(beta_val)
            gamma = tf.constant(gamma_val)
        else:
            # As variables.
            mean = tf.get_variable(layer_name+'_running_mean', shape=mean_val.shape, initializer=tf.constant_initializer(mean_val), dtype=tf.float32)
            std = tf.get_variable(layer_name+'_running_std', shape=std_val.shape, initializer=tf.constant_initializer(std_val), dtype=tf.float32)
            beta = tf.get_variable(layer_name+'_beta', shape=beta_val.shape, initializer=tf.constant_initializer(beta_val), dtype=tf.float32)
            gamma = tf.get_variable(layer_name+'_gamma', shape=gamma_val.shape, initializer=tf.constant_initializer(gamma_val), dtype=tf.float32)
        X = tf.nn.batch_normalization( input_layer, mean=mean, variance=std, 
                                       offset=beta, scale=gamma, variance_epsilon=1e-12, name='batch-norm')
        return X
    # ----------------------- #


# ----------------------- #
def conv_block(input_layer, stage, data, strides=[1, 2, 2, 1], lock=True):
    
    with tf.variable_scope('conv_block'):
        
        x = conv_layer(input_layer, data=data, layer_name='res'+stage+'_branch2a', strides=strides, lock=lock)
        x = batch_norm_layer(x, data=data, layer_name='bn'+stage+'_branch2a', lock=lock)
        x = tf.nn.relu(x)
        
        x = conv_layer(x, data=data, layer_name='res'+stage+'_branch2b', padding='SAME', lock=lock)
        x = batch_norm_layer(x, data=data, layer_name='bn'+stage+'_branch2b', lock=lock)
        x = tf.nn.relu(x)
        
        x = conv_layer(x, data=data, layer_name='res'+stage+'_branch2c', lock=lock)
        x = batch_norm_layer(x, data=data, layer_name='bn'+stage+'_branch2c', lock=lock)
        
        shortcut = conv_layer(input_layer, data=data, layer_name='res'+stage+'_branch1', strides=strides, lock=lock)
        shortcut = batch_norm_layer(shortcut, data=data, layer_name='bn'+stage+'_branch1', lock=lock)
        
        x = tf.add(x, shortcut)
        x = tf.nn.relu(x)
        
    return x
    # ----------------------- #


# ----------------------- #
def identity_block(input_layer, stage, data, lock=True):
    
    with tf.variable_scope('identity_block'):
        
        x = conv_layer(input_layer, data=data, layer_name='res'+stage+'_branch2a', lock=lock)
        x = batch_norm_layer(x, data=data, layer_name='bn'+stage+'_branch2a', lock=lock)
        x = tf.nn.relu(x)
        
        x = conv_layer(x, data=data, layer_name='res'+stage+'_branch2b', padding='SAME', lock=lock)
        x = batch_norm_layer(x, data=data, layer_name='bn'+stage+'_branch2b', lock=lock)
        x = tf.nn.relu(x)
        
        x = conv_layer(x, data=data, layer_name='res'+stage+'_branch2c', lock=lock)
        x = batch_norm_layer(x, data=data, layer_name='bn'+stage+'_branch2c', lock=lock)
        
        x = tf.add(x, input_layer)
        x = tf.nn.relu(x)
        
    return x
    # ----------------------- #

In [13]:
tf.reset_default_graph()

RESNET_HEIGHT = 224
RESNET_WIDTH = 224

image_input = tf.placeholder(dtype=tf.float32, shape=[None, RESNET_HEIGHT, RESNET_WIDTH, 3], name='input')
image = tf.pad(image_input, [[0,0],[3,3],[3,3],[0,0]], "CONSTANT", name='zeropadding-3')


with tf.variable_scope('stage1'):
    res = conv_layer(image, data_h5, 'conv1', strides=[1, 2, 2, 1])
    res = batch_norm_layer(res, data_h5, 'bn_conv1')
    res = tf.nn.relu(res)
    res = tf.nn.max_pool(res, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID', name='pool_conv1')
    print 'Stage 1', res.get_shape()


with tf.variable_scope('stage2'):
    res = conv_block(input_layer=res, stage='2a', data=data_h5, strides=[1, 1, 1, 1])
    res = identity_block(input_layer=res, stage='2b', data=data_h5)
    res = identity_block(input_layer=res, stage='2c', data=data_h5)
    print 'Stage 2', res.get_shape()


with tf.variable_scope('stage3'):
    res = conv_block(input_layer=res, stage='3a', data=data_h5)
    res = identity_block(input_layer=res, stage='3b', data=data_h5)
    res = identity_block(input_layer=res, stage='3c', data=data_h5)
    res = identity_block(input_layer=res, stage='3d', data=data_h5)
    print 'Stage 3', res.get_shape()


with tf.variable_scope('stage4'):
    res = conv_block(input_layer=res, stage='4a', data=data_h5)
    res = identity_block(input_layer=res, stage='4b', data=data_h5)
    res = identity_block(input_layer=res, stage='4c', data=data_h5)
    res = identity_block(input_layer=res, stage='4d', data=data_h5)
    res = identity_block(input_layer=res, stage='4e', data=data_h5)
    res = identity_block(input_layer=res, stage='4f', data=data_h5)
    print 'Stage 4', res.get_shape()


with tf.variable_scope('stage5'):
    res = conv_block(input_layer=res, stage='5a', data=data_h5, lock=False)
    res = identity_block(input_layer=res, stage='5b', data=data_h5, lock=False)
    res = identity_block(input_layer=res, stage='5c', data=data_h5, lock=False)
    print 'Stage 5', res.get_shape()


with tf.variable_scope('stage-final'):
    res = tf.nn.avg_pool(res, ksize=[1, 7, 7, 1], strides=[1, 1, 1, 1], padding='VALID', name='pool_conv1')
    print 'Pool 5', res.get_shape()
    
    # Add the dense layer.
    res = tf.reshape(res, (-1, res.get_shape()[3].value))
    res = dense_layer(input_layer=res, layer_name='fc1000', data=data_h5)
    res = tf.nn.softmax(res)
    print 'Output probabilities', res.get_shape()

Stage 1 (?, 55, 55, 64)
Stage 2 (?, 55, 55, 256)
Stage 3 (?, 28, 28, 512)
Stage 4 (?, 14, 14, 1024)
Stage 5 (?, 7, 7, 2048)
Pool 5 (?, 1, 1, 2048)
Output probabilities (?, 1000)


In [14]:
for var in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES):
    print var

<tf.Variable 'stage5/conv_block/res5a_branch2a/res5a_branch2a_W:0' shape=(1, 1, 1024, 512) dtype=float32_ref>
<tf.Variable 'stage5/conv_block/res5a_branch2a/res5a_branch2a_b:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'stage5/conv_block/bn5a_branch2a/bn5a_branch2a_running_mean:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'stage5/conv_block/bn5a_branch2a/bn5a_branch2a_running_std:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'stage5/conv_block/bn5a_branch2a/bn5a_branch2a_beta:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'stage5/conv_block/bn5a_branch2a/bn5a_branch2a_gamma:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'stage5/conv_block/res5a_branch2b/res5a_branch2b_W:0' shape=(3, 3, 512, 512) dtype=float32_ref>
<tf.Variable 'stage5/conv_block/res5a_branch2b/res5a_branch2b_b:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'stage5/conv_block/bn5a_branch2b/bn5a_branch2b_running_mean:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'stage5/conv_block/bn5a_branch2b/bn5a_branch2b