-
Notifications
You must be signed in to change notification settings - Fork 52
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
237 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
""" Some useful functions to port a model from lasagne to tensorflow. | ||
* Lasagne uses the format BCHW, while tensorflow uses BHWC | ||
(B = batch_size, C = channels, H = height, W = width) | ||
* By default, lasagne uses convolution, while tensorflow implements | ||
cross-correlation (convolution is equivalent to cross-correlation with flipped filters) | ||
Here we define some functions to change the filters from one format to the other | ||
""" | ||
|
||
import numpy as np | ||
|
||
class copy_initializer: | ||
def __init__(self, value_to_copy): | ||
self.value_to_copy = value_to_copy | ||
|
||
def __call__(self, shape, **kwargs): | ||
expected_shape = list(shape) | ||
actual_shape = list(self.value_to_copy.shape) | ||
assert actual_shape == expected_shape, 'Invalid shape for initilizer. Expected: %s. Given: %s.' % (expected_shape, actual_shape) | ||
return self.value_to_copy | ||
|
||
class flipping_copy_initializer (copy_initializer): | ||
def __init__(self, value_to_copy): | ||
v = np.transpose(value_to_copy, [2,3,1,0]) | ||
v = v [::-1,::-1,:,:] | ||
self.value_to_copy = v | ||
|
||
class transpose_copy_initializer (copy_initializer): | ||
def __init__(self, value_to_copy): | ||
v = np.transpose(value_to_copy, [2,3,1,0]) | ||
self.value_to_copy = v |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
from six.moves import cPickle | ||
import tensorflow as tf | ||
import numpy as np | ||
|
||
class TF_CNNModel: | ||
""" Represents a TF model (in this case, with weights trained with the Lasagne library.) | ||
""" | ||
|
||
def __init__(self, model_factory, model_weight_path): | ||
""" Loads the CNN model | ||
Parameters: | ||
model_factory (module): An object containing a | ||
"build_architecture"function. | ||
model_weights_path (str): A file containing the trained weights | ||
""" | ||
with open(model_weight_path, 'rb') as f: | ||
model_params = cPickle.load(f) | ||
|
||
self.input_size = model_params['input_size'] | ||
self.img_size = model_params['img_size'] | ||
|
||
net_input_size = (None, self.input_size[0], self.input_size[1], 1) | ||
self.x_input = tf.placeholder(tf.float32, net_input_size) | ||
self.model = model_factory.build_architecture(self.x_input, | ||
model_params['params']) | ||
|
||
|
||
def get_feature_vector(self, sess, image, layer='fc2'): | ||
""" Runs forward propagation until a desired layer, for one input image | ||
Parameters: | ||
sess (tf session) | ||
image (numpy.ndarray): The input image | ||
layer (str): The desired output layer | ||
""" | ||
|
||
assert len(image.shape) == 2, "Input should have two dimensions: H x W" | ||
|
||
input = image[np.newaxis, :, :, np.newaxis] | ||
|
||
out = sess.run(self.model[layer], feed_dict={self.x_input: input}) | ||
return out | ||
|
||
def get_feature_vector_multiple(self, images, layer='fc2'): | ||
""" Runs forward propagation until a desired layer, for one input image | ||
Parameters: | ||
images (numpy.ndarray): The input images. Should have three dimensions: | ||
N x H x W, where N: number of images, H: height, W: width | ||
layer (str): The desired output layer | ||
""" | ||
|
||
images = np.asarray(images) | ||
assert len(images.shape) == 3, "Input should have three dimensions: N x H x W" | ||
|
||
# Add the "channel" dimension: | ||
input = np.expand_dims(images, axis=3) | ||
|
||
|
||
# Perform forward propagation until the desired layer | ||
out = sess.run(self.model[layer], feed_dict={self.x_input: input}) | ||
return out |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
""" This example shows how to extract features for a new signature, | ||
using the CNN trained on the GPDS dataset. It also compares the | ||
results with the ones obtained by the authors, to ensure consistency. | ||
Note that loading and compiling the model takes time. It is preferable | ||
to load and process multiple signatures in the same python session. | ||
""" | ||
from scipy.misc import imread | ||
from preprocess.normalize import preprocess_signature | ||
import tensorflow as tf | ||
import tf_signet | ||
from tf_cnn_model import TF_CNNModel | ||
import numpy as np | ||
|
||
canvas_size = (952, 1360) # Maximum signature size | ||
|
||
# Load and pre-process the signature | ||
original = imread('data/some_signature.png', flatten=1) | ||
|
||
processed = preprocess_signature(original, canvas_size) | ||
|
||
# Load the model | ||
model_weight_path = 'models/signet.pkl' | ||
model = TF_CNNModel(tf_signet, model_weight_path) | ||
|
||
# Create a tensorflow session | ||
sess = tf.Session() | ||
sess.run(tf.global_variables_initializer()) | ||
|
||
# Use the CNN to extract features | ||
feature_vector = model.get_feature_vector(sess, processed) | ||
|
||
# Compare the obtained feature vector to the expected value | ||
# (to confirm same results obtained by the authors) | ||
|
||
processed_correct = np.load('data/processed.npy') | ||
|
||
assert np.allclose(processed_correct, processed), "The preprocessed image is different than expected. "+ \ | ||
"Check the version of packages 'scipy' and 'pillow'" | ||
|
||
feature_vector_correct = np.load('data/some_signature_signet.npy') | ||
assert np.allclose(feature_vector_correct, feature_vector, atol=1e-3) | ||
|
||
print('Tests passed.') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
import tensorflow as tf | ||
from tensorflow.contrib import slim | ||
|
||
from lasagne_to_tf import copy_initializer, transpose_copy_initializer | ||
|
||
|
||
def build_architecture(input_var, params): | ||
""" Creates the CNN model described in the paper. Loads the learned weights. | ||
input_var: tf.placeholder of size (None, 150, 220, 1) | ||
params: the learned parameters | ||
""" | ||
net = {} | ||
net['input'] = input_var | ||
conv1 = conv_bn(input_var, 'conv1', | ||
num_outputs=96, kernel_size=11, stride=4, | ||
weights=params[0], beta=params[1], gamma=params[2], | ||
mean=params[3], inv_std=params[4]) | ||
pool1 = slim.max_pool2d(conv1, 3, 2, scope='pool1') | ||
|
||
conv2 = conv_bn(pool1, 'conv2', num_outputs=256, kernel_size=5, padding='SAME', | ||
weights=params[5], beta=params[6], gamma=params[7], | ||
mean=params[8], inv_std=params[9]) | ||
pool2 = slim.max_pool2d(conv2, 3, 2, scope='pool2') | ||
|
||
conv3 = conv_bn(pool2, 'conv3', num_outputs=384, kernel_size=3, padding='SAME', | ||
weights=params[10], beta=params[11], gamma=params[12], | ||
mean=params[13], inv_std=params[14]) | ||
|
||
conv4 = conv_bn(conv3, 'conv4', num_outputs=384, kernel_size=3, padding='SAME', | ||
weights=params[15], beta=params[16], gamma=params[17], | ||
mean=params[18], inv_std=params[19]) | ||
|
||
conv5 = conv_bn(conv4, 'conv5', num_outputs=256, kernel_size=3, padding='SAME', | ||
weights=params[20], beta=params[21], gamma=params[22], | ||
mean=params[23], inv_std=params[24]) | ||
|
||
pool5 = slim.max_pool2d(conv5, 3, 2, scope='pool5') | ||
|
||
# Transpose pool5 activations to the lasagne standard, before flattening | ||
pool5 = tf.transpose(pool5, (0,3,1,2)) | ||
pool5_flat = slim.flatten(pool5) | ||
|
||
net['fc1'] = dense_bn(pool5_flat, 'fc1', 2048, | ||
weights=params[25], beta=params[26], gamma=params[27], | ||
mean=params[28], inv_std=params[29]) | ||
|
||
net['fc2'] = dense_bn(net['fc1'], 'fc2', 2048, | ||
weights=params[30], beta=params[31], gamma=params[32], | ||
mean=params[33], inv_std=params[34]) | ||
|
||
return net | ||
|
||
|
||
# Helper functions: | ||
|
||
def batch_norm(input, scope, beta, gamma, mean, inv_std): | ||
""" Implements Batch normalization (http://arxiv.org/abs/1502.03167) | ||
Uses the variables (beta and gamma) learned by the model; | ||
Uses the statistics (mean, inv_std) collected from training data """ | ||
with tf.name_scope(scope): | ||
beta_var = tf.Variable(beta, name='beta', dtype=tf.float32) | ||
gamma_var = tf.Variable(gamma, name='gamma', dtype=tf.float32) | ||
mean_var = tf.Variable(mean, name='mean', dtype=tf.float32) | ||
inv_std_var = tf.Variable(inv_std, name='inv_std', dtype=tf.float32) | ||
return (input - mean_var) * (gamma_var * inv_std_var) + beta_var | ||
|
||
|
||
def conv_bn(input, scope, num_outputs, kernel_size, weights, | ||
beta, gamma, mean, inv_std, stride=1, padding='VALID'): | ||
""" Performs 2D convolution followed by batch normalization and ReLU. | ||
Uses weigths learned by the model trained using lasagne (transposes them | ||
to the TensorFlow standard)""" | ||
conv = slim.conv2d(input, num_outputs=num_outputs, kernel_size=kernel_size, | ||
stride=stride, padding=padding, scope=scope, | ||
weights_initializer=transpose_copy_initializer(weights), | ||
biases_initializer=None, # No biases since we use BN | ||
activation_fn=None) # ReLU is applied after BN | ||
bn = batch_norm(conv, scope='%s_bn' % scope, | ||
beta=beta, gamma=gamma, mean=mean, inv_std=inv_std) | ||
relu = tf.nn.relu(bn) | ||
return relu | ||
|
||
|
||
def dense_bn(input, scope, num_outputs, weights, beta, gamma, mean, inv_std): | ||
""" Implements a fully connected layer followed by batch normalization and | ||
ReLU """ | ||
with tf.name_scope(scope): | ||
w = tf.Variable(weights, name='w', dtype=tf.float32) | ||
dense = tf.matmul(input, w) | ||
bn = batch_norm(dense, scope='%s_bn' % scope, | ||
beta=beta, gamma=gamma, mean=mean, inv_std=inv_std) | ||
relu = tf.nn.relu(bn) | ||
return relu |