In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import keras

Using TensorFlow backend.


In [2]:
from keras.applications.resnet50 import ResNet50, preprocess_input, decode_predictions
from keras.models import Model
from keras.preprocessing import image

In [3]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.log_device_placement = True
sess = tf.Session(config=config)

In [4]:
def new_weights(shape):
    return tf.Variable(tf.truncated_normal(shape, stddev=0.05))
 def new_biases(length):
    return tf.Variable(tf.constant(0.05, shape=[length]))

In [5]:
class ConvLayer:
    def __init__(self, filter_size, num_input_channels, num_filters, stride=2, padding='VALID'):
        shape = [filter_size, filter_size, num_input_channels, num_filters]
        self.shape = shape
        self.W = new_weights(shape)
        self.b = new_biases(num_filters)
        self.stride = stride
        self.padding = padding
    def forward(self, X):
        X = tf.nn.conv2d(X, self.W, strides=[1, self.stride, self.stride, 1], padding=self.padding)
        return X + self.b
    
    def get_params(self):
        return [self.W, self.b]
        
    def copyFromKerasLayers(self, layer):
        W, b = layer.get_weights()
        op1 = self.W.assign(W)
        op2 = self.b.assign(b)
        self.session.run((op1, op2))

In [6]:
class BatchNormLayer:
    def __init__(self, D):
        self.mean = tf.Variable(np.zeros(D, dtype=np.float32), trainable=False)
        self.var = tf.Variable(np.ones(D, dtype=np.float32), trainable=False)
        self.gamma = tf.Variable(np.ones(D, dtype=np.float32))
        self.beta = tf.Variable(np.zeros(D, dtype=np.float32))
    
    def forward(self, X):
        return tf.nn.batch_normalization(X, self.mean, self.var, self.beta, self.gamma, 1e-3)
    
    def get_params(self):
        return [self.gamma, self.beta, self.mean, self.var]
    
    def copyFromKerasLayers(self, layer):
        gamma, beta, mean, var = layer.get_weights()
        op1 = self.mean.assign(mean)
        op2 = self.var.assign(var)
        op3 = self.gamma.assign(gamma)
        op4 = self.beta.assign(beta)
        self.session.run((op1, op2, op3, op4))

In [7]:
class ConvBlock:
    def __init__(self, num_input_channels, feature_map_sizes, stride=2, activation=tf.nn.relu):
        self.session = None
        self.f = tf.nn.relu
        
        #main branch: Conv -> BN -> F() -> Conv -> BN -> F() -> Conv -> BN
        self.conv1 = ConvLayer(1, num_input_channels, feature_map_sizes[0], stride)
        self.bn1 = BatchNormLayer(feature_map_sizes[0])
        self.conv2 = ConvLayer(3, feature_map_sizes[0], feature_map_sizes[1], 1, 'SAME')
        self.bn2 = BatchNormLayer(feature_map_sizes[1])
        self.conv3 = ConvLayer(1, feature_map_sizes[1], feature_map_sizes[2], 1)
        self.bn3 = BatchNormLayer(feature_map_sizes[2])
        
        # shortcut brunch Conv -> BN
        self.convs = ConvLayer(1, num_input_channels, feature_map_sizes[2], stride)
        self.bns = BatchNormLayer(feature_map_sizes[2])
        
        self.layers = [self.conv1, self.bn1, self.conv2, self.bn2, self.conv3, self.bn3, self.convs, self.bns]
        
        self.input_ = tf.placeholder(tf.float32, shape=(1, 224, 224, num_input_channels))
        self.output = self.forward(self.input_)
    
    def forward(self, X):
        # main branch 
        FX = self.conv1.forward(X)
        FX = self.bn1.forward(FX)
        FX = self.f(FX)
        FX = self.conv2.forward(FX)
        FX = self.bn2.forward(FX)
        FX = self.f(FX)
        FX = self.conv3.forward(FX)
        FX = self.bn3.forward(FX)
        #short cut
        SX = self.convs.forward(X)
        SX = self.bns.forward(SX)
        
        return self.f(FX + SX)
    
    def predict(self, X):
        assert(self.session is not None)
        return self.session.run(self.output, feed_dict={self.input_: X})
    
    def set_session(self, session):
        self.session = session
        self.conv1.session = session
        self.bn1.session = session
        self.conv2.session = session
        self.bn2.session = session
        self.conv3.session = session
        self.bn3.session = session
        self.convs.session = session
        self.bns.session = session
    
    def copyFromKerasLayers(self, layers):
        self.conv1.copyFromKerasLayers(layers[0])
        self.bn1.copyFromKerasLayers(layers[1])
        self.conv2.copyFromKerasLayers(layers[3])
        self.bn2.copyFromKerasLayers(layers[4])
        self.conv3.copyFromKerasLayers(layers[6])
        self.bn3.copyFromKerasLayers(layers[8])
        self.convs.copyFromKerasLayers(layers[7])
        self.bns.copyFromKerasLayers(layers[9])
    
    def get_params(self):
        params = []
        for layer in self.layers:
            params += layer.get_params()
        return params

In [8]:
class IdentityBlock:
    def __init__(self, num_input_channels, feature_map_sizes, activation=tf.nn.relu):
        self.session = None
        self.f = tf.nn.relu
        
        # main branch Conv -> BN -> f() -> Conv -> BN -> F() -> Conv -> BN
        self.conv1 = ConvLayer(1, num_input_channels, feature_map_sizes[0], 1)
        self.bn1 = BatchNormLayer(feature_map_sizes[0])
        self.conv2 = ConvLayer(3, feature_map_sizes[0], feature_map_sizes[1], 1, 'SAME')
        self.bn2 = BatchNormLayer(feature_map_sizes[1])
        self.conv3 = ConvLayer(1, feature_map_sizes[1], feature_map_sizes[2], 1)
        self.bn3 = BatchNormLayer(feature_map_sizes[2])
        
        self.layers = [self.conv1, self.bn1, self.conv2, self.bn2, self.conv3, self.bn3]
        
        self.input_ = tf.placeholder(tf.float32, shape=(1, 224, 224, num_input_channels))
        self.output = self.forward(self.input_)
        
    def forward(self, X):
        # main branch
        FX = self.conv1.forward(X)
        FX = self.bn1.forward(FX)
        FX = self.f(FX)
        FX = self.conv2.forward(FX)
        FX = self.bn2.forward(FX)
        FX = self.f(FX)
        FX = self.conv3.forward(FX)
        FX = self.bn3.forward(FX)
        
        return self.f(FX + X)
    
    def predict(self, X):
        assert(self.session is not None)
        return self.session.run(self.output, feed_dict={self.input_: X})
    
    def set_session(self, session):
        self.session = session
        self.conv1.session = session
        self.bn1.session = session
        self.conv2.session = session
        self.bn2.session = session
        self.conv3.session = session
        self.bn3.session = session
    
    def copyFromKerasLayers(self, layers):
        self.conv1.copyFromKerasLayers(layers[0])
        self.bn1.copyFromKerasLayers(layers[1])
        self.conv2.copyFromKerasLayers(layers[3])
        self.bn2.copyFromKerasLayers(layers[4])
        self.conv3.copyFromKerasLayers(layers[6])
        self.bn3.copyFromKerasLayers(layers[7])
    
         
    def get_params(self):
        params = []
        for layer in self.layers:
            params += layer.get_params()
        return params

In [9]:
resnet = ResNet50(weights='imagenet')

In [10]:
class ReLUlayer:
    def forward(self, X):
        return tf.nn.relu(X)
    
    def get_params(self):
        return []

In [11]:
class MaxPoolLayer:
    def __init__(self, dim):
        self.dim = dim
    
    def forward(self, X):
        return tf.nn.max_pool(X, ksize=[1, self.dim, self.dim, 1], strides=[1, 2, 2, 1], padding='VALID')
    
    def get_params(self):
        return []

In [324]:
class AveragePoolLayer:
    def __init__(self, dim):
        self.dim = dim
    
    def forward(self, X):
        return tf.nn.avg_pool(X, ksize=[1, self.dim, self.dim, 1], strides=[1, 2, 2, 1], padding='VALID')
    
    def get_params(self):
        return []

In [115]:
class ZeroPaddingLayer:
    def forward(self, X):
        return tf.pad(X, paddings = tf.constant([[0, 0], [3, 3], [3, 3], [0, 0]]))
        
    def get_params(self):
        return []

In [378]:
class Flatten:
    def forward(self, X):
        n, w, h, c = X.get_shape().as_list()
        return tf.reshape(X, [-1, w*h*c])
    
    def get_params(self):
        return []

In [405]:
class DenseLayer:
    def __init__(self, input_size, output_size):
        self.W = new_weights([input_size, output_size])
        self.b = new_biases(output_size)
    
    def softmax(self, x):
        m = tf.reduce_max(x, 1)
        x -= m
        e = tf.exp(x)
        return e / tf.reduce_sum(e, -1)
    
    def forward(self, X):
        return self.softmax(tf.matmul(X, self.W))
    
    def get_params(self):
        return [self.W]
    
    def copyFromKerasLayers(self, layer):
        W, b = layer.get_weights()
        op1 = self.W.assign(W)
        op2 = self.b.assign(b)
        self.session.run((op1, op2))

In [406]:
class PartialResNet:
    def __init__(self):
        self.layers = [ZeroPaddingLayer(),
                       ConvLayer(filter_size=7, num_input_channels=3, num_filters=64, stride=2, padding='VALID'),
                      BatchNormLayer(64), 
                      ReLUlayer(),
                      MaxPoolLayer(dim = 3),
                      ConvBlock(num_input_channels=64, feature_map_sizes=[64, 64, 256], stride=1),
                      IdentityBlock(num_input_channels=256, feature_map_sizes=[64, 64, 256]), 
                      IdentityBlock(num_input_channels=256, feature_map_sizes=[64, 64, 256]), 
                      ConvBlock(num_input_channels=256, feature_map_sizes=[128, 128, 512]),
                      IdentityBlock(num_input_channels=512, feature_map_sizes=[128, 128, 512]), 
                      IdentityBlock(num_input_channels=512, feature_map_sizes=[128, 128, 512]),
                      IdentityBlock(num_input_channels=512, feature_map_sizes=[128, 128, 512]), 
                      ConvBlock(num_input_channels=512, feature_map_sizes=[256, 256, 1024]),
                      IdentityBlock(num_input_channels=1024, feature_map_sizes=[256, 256, 1024]), 
                      IdentityBlock(num_input_channels=1024, feature_map_sizes=[256, 256, 1024]),
                      IdentityBlock(num_input_channels=1024, feature_map_sizes=[256, 256, 1024]),
                      IdentityBlock(num_input_channels=1024, feature_map_sizes=[256, 256, 1024]),
                      IdentityBlock(num_input_channels=1024, feature_map_sizes=[256, 256, 1024]), 
                      ConvBlock(num_input_channels=1024, feature_map_sizes=[512, 512, 2048]),
                      IdentityBlock(num_input_channels=2048, feature_map_sizes=[512, 512, 2048]),
                      IdentityBlock(num_input_channels=2048, feature_map_sizes=[512, 512, 2048]),
                      AveragePoolLayer(dim = 7),
                      Flatten(),
                      DenseLayer(2048, 1000)]
   
        self.input_ = tf.placeholder(tf.float32, shape = (None, 224, 224, 3))
        self.output = self.forward(self.input_)
    
    def copyFromKerasLayers(self, layers):
        self.layers[1].copyFromKerasLayers(layers[2])
        self.layers[2].copyFromKerasLayers(layers[3])
        self.layers[5].copyFromKerasLayers(layers[6:18])
        self.layers[6].copyFromKerasLayers(layers[18:28])
        self.layers[7].copyFromKerasLayers(layers[28:38])
        self.layers[8].copyFromKerasLayers(layers[38:50])
        self.layers[9].copyFromKerasLayers(layers[50:60])
        self.layers[10].copyFromKerasLayers(layers[60:70])
        self.layers[11].copyFromKerasLayers(layers[70:80])
        self.layers[12].copyFromKerasLayers(layers[80:92])
        self.layers[13].copyFromKerasLayers(layers[92:102])
        self.layers[14].copyFromKerasLayers(layers[102:112])
        self.layers[15].copyFromKerasLayers(layers[112:122])
        self.layers[16].copyFromKerasLayers(layers[122:132])
        self.layers[17].copyFromKerasLayers(layers[132:142])
        self.layers[18].copyFromKerasLayers(layers[142:154])
        self.layers[19].copyFromKerasLayers(layers[154:164])
        self.layers[20].copyFromKerasLayers(layers[164:174])
        self.layers[23].copyFromKerasLayers(layers[176])
    
    def forward(self, X):
        for layer in self.layers:
            X = layer.forward(X)
        return X
    
    def predict(self, X):
        assert(self.session is not None)
        return self.session.run(self.output, feed_dict={self.input_: X})
    
    def set_session(self, session):
        self.session = session
        self.layers[1].session = session
        self.layers[2].session = session
        self.layers[5].set_session(session)
        self.layers[6].set_session(session)
        self.layers[7].set_session(session)
        self.layers[8].set_session(session)
        self.layers[9].set_session(session)
        self.layers[10].set_session(session)
        self.layers[11].set_session(session)
        self.layers[12].set_session(session)
        self.layers[13].set_session(session)
        self.layers[14].set_session(session)
        self.layers[15].set_session(session)
        self.layers[16].set_session(session)
        self.layers[17].set_session(session)
        self.layers[18].set_session(session)
        self.layers[19].set_session(session)
        self.layers[20].set_session(session)
        self.layers[23].session = session
        
    def get_params(self):
        params = []
        for layer in self.layers:
            params += layer.get_params()
        
        return params

In [407]:
partial_model = Model(input=resnet.input, output=resnet.output)

  """Entry point for launching an IPython kernel.


In [408]:
my_partial_resnet = PartialResNet()

In [409]:
X = np.random.random((1, 224, 224, 3))
keras_output = partial_model.predict(X)
keras_output.shape

(1, 1000)

In [410]:
init = tf.variables_initializer(my_partial_resnet.get_params())
session = keras.backend.get_session()
my_partial_resnet.set_session(session)
session.run(init)

In [411]:
my_output = my_partial_resnet.predict(X)
my_output.shape

(1, 1000)

In [412]:
my_partial_resnet.copyFromKerasLayers(partial_model.layers)

In [413]:
output = my_partial_resnet.predict(X)
diff = np.abs(output - keras_output).sum()
diff

0.0064948834