Adding tensorflow example

luizgh · Nov 29, 2017 · f9ad64f · f9ad64f
1 parent db1e5c0
commit f9ad64f
Show file tree

Hide file tree

Showing 4 changed files with 237 additions and 0 deletions.
diff --git a/lasagne_to_tf.py b/lasagne_to_tf.py
@@ -0,0 +1,32 @@
+""" Some useful functions to port a model from lasagne to tensorflow.
+
+    * Lasagne uses the format BCHW, while tensorflow uses BHWC 
+      (B = batch_size, C = channels, H = height, W = width)
+    * By default, lasagne uses convolution, while tensorflow implements
+      cross-correlation (convolution is equivalent to cross-correlation with flipped filters)
+
+    Here we define some functions to change the filters from one format to the other
+"""
+
+import numpy as np
+
+class copy_initializer:
+    def __init__(self, value_to_copy):
+        self.value_to_copy = value_to_copy
+
+    def __call__(self, shape, **kwargs):
+        expected_shape = list(shape)
+        actual_shape = list(self.value_to_copy.shape)
+        assert actual_shape == expected_shape, 'Invalid shape for initilizer. Expected: %s. Given: %s.' % (expected_shape, actual_shape)
+        return self.value_to_copy
+
+class flipping_copy_initializer (copy_initializer):
+    def __init__(self, value_to_copy):
+        v = np.transpose(value_to_copy, [2,3,1,0])
+        v = v [::-1,::-1,:,:]
+        self.value_to_copy = v
+
+class transpose_copy_initializer (copy_initializer):
+    def __init__(self, value_to_copy):
+        v = np.transpose(value_to_copy, [2,3,1,0])
+        self.value_to_copy = v
diff --git a/tf_cnn_model.py b/tf_cnn_model.py
@@ -0,0 +1,65 @@
+from six.moves import cPickle
+import tensorflow as tf
+import numpy as np
+
+class TF_CNNModel:
+    """ Represents a TF model (in this case, with weights trained with the Lasagne library.)
+    """
+
+    def __init__(self, model_factory, model_weight_path):
+        """ Loads the CNN model
+
+        Parameters:
+            model_factory (module): An object containing a
+                    "build_architecture"function.
+            model_weights_path (str): A file containing the trained weights
+        """
+        with open(model_weight_path, 'rb') as f:
+            model_params = cPickle.load(f)
+
+        self.input_size = model_params['input_size']
+        self.img_size = model_params['img_size']
+
+        net_input_size = (None, self.input_size[0], self.input_size[1], 1)
+        self.x_input = tf.placeholder(tf.float32, net_input_size)
+        self.model = model_factory.build_architecture(self.x_input,
+                                                      model_params['params'])
+
+
+    def get_feature_vector(self, sess, image, layer='fc2'):
+        """ Runs forward propagation until a desired layer, for one input image
+
+        Parameters:
+            sess (tf session)
+            image (numpy.ndarray): The input image
+            layer (str): The desired output layer
+
+        """
+
+        assert len(image.shape) == 2, "Input should have two dimensions: H x W"
+
+        input = image[np.newaxis, :, :, np.newaxis]
+
+        out = sess.run(self.model[layer], feed_dict={self.x_input: input})
+        return out
+
+    def get_feature_vector_multiple(self, images, layer='fc2'):
+        """ Runs forward propagation until a desired layer, for one input image
+
+        Parameters:
+            images (numpy.ndarray): The input images. Should have three dimensions:
+                    N x H x W, where N: number of images, H: height, W: width
+            layer (str): The desired output layer
+
+        """
+
+        images = np.asarray(images)
+        assert len(images.shape) == 3, "Input should have three dimensions: N x H x W"
+
+        # Add the "channel" dimension:
+        input = np.expand_dims(images, axis=3)
+
+
+        # Perform forward propagation until the desired layer
+        out = sess.run(self.model[layer], feed_dict={self.x_input: input})
+        return out
diff --git a/tf_example.py b/tf_example.py
@@ -0,0 +1,45 @@
+""" This example shows how to extract features for a new signature, 
+    using the CNN trained on the GPDS dataset. It also compares the
+    results with the ones obtained by the authors, to ensure consistency.
+
+    Note that loading and compiling the model takes time. It is preferable
+    to load and process multiple signatures in the same python session.
+
+"""
+from scipy.misc import imread
+from preprocess.normalize import preprocess_signature
+import tensorflow as tf
+import tf_signet
+from tf_cnn_model import TF_CNNModel
+import numpy as np
+
+canvas_size = (952, 1360)  # Maximum signature size
+
+# Load and pre-process the signature
+original = imread('data/some_signature.png', flatten=1)
+
+processed = preprocess_signature(original, canvas_size)
+
+# Load the model
+model_weight_path = 'models/signet.pkl'
+model = TF_CNNModel(tf_signet, model_weight_path)
+
+# Create a tensorflow session
+sess = tf.Session()
+sess.run(tf.global_variables_initializer())
+
+# Use the CNN to extract features
+feature_vector = model.get_feature_vector(sess, processed)
+
+# Compare the obtained feature vector to the expected value 
+# (to confirm same results obtained by the authors)
+
+processed_correct = np.load('data/processed.npy')
+
+assert np.allclose(processed_correct, processed), "The preprocessed image is different than expected. "+ \
+                                                 "Check the version of packages 'scipy' and 'pillow'"
+
+feature_vector_correct = np.load('data/some_signature_signet.npy')
+assert np.allclose(feature_vector_correct, feature_vector, atol=1e-3)
+
+print('Tests passed.')
diff --git a/tf_signet.py b/tf_signet.py
@@ -0,0 +1,95 @@
+import tensorflow as tf
+from tensorflow.contrib import slim
+
+from lasagne_to_tf import copy_initializer, transpose_copy_initializer
+
+
+def build_architecture(input_var, params):
+    """ Creates the CNN model described in the paper. Loads the learned weights.
+        
+        input_var: tf.placeholder of size (None, 150, 220, 1)
+        params: the learned parameters
+
+    """
+    net = {}
+    net['input'] = input_var
+    conv1 = conv_bn(input_var, 'conv1',
+                    num_outputs=96, kernel_size=11, stride=4,
+                    weights=params[0], beta=params[1], gamma=params[2],
+                    mean=params[3], inv_std=params[4])
+    pool1 = slim.max_pool2d(conv1, 3, 2, scope='pool1')
+
+    conv2 = conv_bn(pool1, 'conv2', num_outputs=256, kernel_size=5, padding='SAME',
+                    weights=params[5], beta=params[6], gamma=params[7],
+                    mean=params[8], inv_std=params[9])
+    pool2 = slim.max_pool2d(conv2, 3, 2, scope='pool2')
+
+    conv3 = conv_bn(pool2, 'conv3', num_outputs=384, kernel_size=3, padding='SAME',
+                    weights=params[10], beta=params[11], gamma=params[12],
+                    mean=params[13], inv_std=params[14])
+
+    conv4 = conv_bn(conv3, 'conv4', num_outputs=384, kernel_size=3, padding='SAME',
+                    weights=params[15], beta=params[16], gamma=params[17],
+                    mean=params[18], inv_std=params[19])
+
+    conv5 = conv_bn(conv4, 'conv5', num_outputs=256, kernel_size=3, padding='SAME',
+                    weights=params[20], beta=params[21], gamma=params[22],
+                    mean=params[23], inv_std=params[24])
+
+    pool5 = slim.max_pool2d(conv5, 3, 2, scope='pool5')
+
+    # Transpose pool5 activations to the lasagne standard, before flattening
+    pool5 = tf.transpose(pool5, (0,3,1,2))
+    pool5_flat = slim.flatten(pool5)
+
+    net['fc1'] = dense_bn(pool5_flat, 'fc1', 2048,
+                   weights=params[25], beta=params[26], gamma=params[27],
+                   mean=params[28], inv_std=params[29])
+
+    net['fc2'] = dense_bn(net['fc1'], 'fc2', 2048,
+                   weights=params[30], beta=params[31], gamma=params[32],
+                   mean=params[33], inv_std=params[34])
+
+    return net
+
+
+# Helper functions:
+
+def batch_norm(input, scope, beta, gamma, mean, inv_std):
+    """ Implements Batch normalization (http://arxiv.org/abs/1502.03167)
+        Uses the variables (beta and gamma) learned by the model;
+        Uses the statistics (mean, inv_std) collected from training data """
+    with tf.name_scope(scope):
+        beta_var = tf.Variable(beta, name='beta', dtype=tf.float32)
+        gamma_var = tf.Variable(gamma, name='gamma', dtype=tf.float32)
+        mean_var = tf.Variable(mean, name='mean', dtype=tf.float32)
+        inv_std_var = tf.Variable(inv_std, name='inv_std', dtype=tf.float32)
+        return (input - mean_var) * (gamma_var * inv_std_var) + beta_var
+
+
+def conv_bn(input, scope, num_outputs, kernel_size, weights,
+            beta, gamma, mean, inv_std, stride=1, padding='VALID'):
+    """ Performs 2D convolution followed by batch normalization and ReLU.
+        Uses weigths learned by the model trained using lasagne (transposes them
+           to the TensorFlow standard)"""
+    conv = slim.conv2d(input, num_outputs=num_outputs, kernel_size=kernel_size,
+                      stride=stride, padding=padding, scope=scope,
+                      weights_initializer=transpose_copy_initializer(weights),
+                      biases_initializer=None, # No biases since we use BN
+                      activation_fn=None) # ReLU is applied after BN
+    bn = batch_norm(conv, scope='%s_bn' % scope,
+                    beta=beta, gamma=gamma, mean=mean, inv_std=inv_std)
+    relu = tf.nn.relu(bn)
+    return relu
+
+
+def dense_bn(input, scope, num_outputs, weights, beta, gamma, mean, inv_std):
+    """ Implements a fully connected layer followed by batch normalization and
+        ReLU """
+    with tf.name_scope(scope):
+        w = tf.Variable(weights, name='w', dtype=tf.float32)
+        dense = tf.matmul(input, w)
+    bn = batch_norm(dense, scope='%s_bn' % scope,
+                    beta=beta, gamma=gamma, mean=mean, inv_std=inv_std)
+    relu = tf.nn.relu(bn)
+    return relu