Permalink
Find file
c784bf6 May 10, 2016
334 lines (251 sloc) 9.56 KB
import os
os.environ["GLOG_minloglevel"] = "2"
import sys
import re
import caffe
import numpy as np
import tensorflow as tf
import skimage.io
from caffe.proto import caffe_pb2
from synset import *
import resnet
class CaffeParamProvider():
def __init__(self, caffe_net):
self.caffe_net = caffe_net
def conv_kernel(self, name):
k = self.caffe_net.params[name][0].data
# caffe [out_channels, in_channels, filter_height, filter_width]
# 0 1 2 3
# tensorflow [filter_height, filter_width, in_channels, out_channels]
# 2 3 1 0
return k.transpose((2, 3, 1, 0))
return k
def bn_gamma(self, name):
return self.caffe_net.params[name][0].data
def bn_beta(self, name):
return self.caffe_net.params[name][1].data
def bn_mean(self, name):
return self.caffe_net.params[name][0].data
def bn_variance(self, name):
return self.caffe_net.params[name][1].data
def fc_weights(self, name):
w = self.caffe_net.params[name][0].data
w = w.transpose((1, 0))
return w
def fc_biases(self, name):
b = self.caffe_net.params[name][1].data
return b
def preprocess(img):
"""Changes RGB [0,1] valued image to BGR [0,255] with mean subtracted."""
mean_bgr = load_mean_bgr()
print 'mean blue', np.mean(mean_bgr[:, :, 0])
print 'mean green', np.mean(mean_bgr[:, :, 1])
print 'mean red', np.mean(mean_bgr[:, :, 2])
out = np.copy(img) * 255.0
out = out[:, :, [2, 1, 0]] # swap channel from RGB to BGR
out -= mean_bgr
return out
def assert_almost_equal(caffe_tensor, tf_tensor):
t = tf_tensor[0]
c = caffe_tensor[0].transpose((1, 2, 0))
#for i in range(0, t.shape[-1]):
# print "tf", i, t[:,i]
# print "caffe", i, c[:,i]
if t.shape != c.shape:
print "t.shape", t.shape
print "c.shape", c.shape
sys.exit(1)
d = np.linalg.norm(t - c)
print "d", d
assert d < 500
# returns image of shape [224, 224, 3]
# [height, width, depth]
def load_image(path, size=224):
img = skimage.io.imread(path)
short_edge = min(img.shape[:2])
yy = int((img.shape[0] - short_edge) / 2)
xx = int((img.shape[1] - short_edge) / 2)
crop_img = img[yy:yy + short_edge, xx:xx + short_edge]
resized_img = skimage.transform.resize(crop_img, (size, size))
return resized_img
def load_mean_bgr():
""" bgr mean pixel value image, [0, 255]. [height, width, 3] """
with open("data/ResNet_mean.binaryproto", mode='rb') as f:
data = f.read()
blob = caffe_pb2.BlobProto()
blob.ParseFromString(data)
mean_bgr = caffe.io.blobproto_to_array(blob)[0]
assert mean_bgr.shape == (3, 224, 224)
return mean_bgr.transpose((1, 2, 0))
def load_caffe(img_p, layers=50):
caffe.set_mode_cpu()
prototxt = "data/ResNet-%d-deploy.prototxt" % layers
caffemodel = "data/ResNet-%d-model.caffemodel" % layers
net = caffe.Net(prototxt, caffemodel, caffe.TEST)
net.blobs['data'].data[0] = img_p.transpose((2, 0, 1))
assert net.blobs['data'].data[0].shape == (3, 224, 224)
net.forward()
caffe_prob = net.blobs['prob'].data[0]
print_prob(caffe_prob)
return net
# returns the top1 string
def print_prob(prob):
#print prob
pred = np.argsort(prob)[::-1]
# Get top1 label
top1 = synset[pred[0]]
print "Top1: ", top1
# Get top5 label
top5 = [synset[pred[i]] for i in range(5)]
print "Top5: ", top5
return top1
def parse_tf_varnames(p, tf_varname, num_layers):
if tf_varname == 'scale1/weights':
return p.conv_kernel('conv1')
elif tf_varname == 'scale1/gamma':
return p.bn_gamma('scale_conv1')
elif tf_varname == 'scale1/beta':
return p.bn_beta('scale_conv1')
elif tf_varname == 'scale1/moving_mean':
return p.bn_mean('bn_conv1')
elif tf_varname == 'scale1/moving_variance':
return p.bn_variance('bn_conv1')
elif tf_varname == 'fc/weights':
return p.fc_weights('fc1000')
elif tf_varname == 'fc/biases':
return p.fc_biases('fc1000')
# scale2/block1/shortcut/weights
# scale3/block2/c/moving_mean
# scale3/block6/c/moving_variance
# scale4/block3/c/moving_mean
# scale4/block8/a/beta
re1 = 'scale(\d+)/block(\d+)/(shortcut|a|b|c|A|B)'
m = re.search(re1, tf_varname)
def letter(i):
return chr(ord('a') + i - 1)
scale_num = int(m.group(1))
block_num = int(m.group(2))
if scale_num == 2:
# scale 2 always uses block letters
block_str = letter(block_num)
elif scale_num == 3 or scale_num == 4:
# scale 3 uses block letters for l=50 and numbered blocks for l=101, l=151
# scale 4 uses block letters for l=50 and numbered blocks for l=101, l=151
if num_layers == 50:
block_str = letter(block_num)
else:
if block_num == 1:
block_str = 'a'
else:
block_str = 'b%d' % (block_num - 1)
elif scale_num == 5:
# scale 5 always block letters
block_str = letter(block_num)
else:
raise ValueError("unexpected scale_num %d" % scale_num)
branch = m.group(3)
if branch == "shortcut":
branch_num = 1
conv_letter = ''
else:
branch_num = 2
conv_letter = branch.lower()
x = (scale_num, block_str, branch_num, conv_letter)
#print x
if 'weights' in tf_varname:
return p.conv_kernel('res%d%s_branch%d%s' % x)
if 'gamma' in tf_varname:
return p.bn_gamma('scale%d%s_branch%d%s' % x)
if 'beta' in tf_varname:
return p.bn_beta('scale%d%s_branch%d%s' % x)
if 'moving_mean' in tf_varname:
return p.bn_mean('bn%d%s_branch%d%s' % x)
if 'moving_variance' in tf_varname:
return p.bn_variance('bn%d%s_branch%d%s' % x)
raise ValueError('unhandled var ' + tf_varname)
def checkpoint_fn(layers):
return 'ResNet-L%d.ckpt' % layers
def meta_fn(layers):
return 'ResNet-L%d.meta' % layers
def convert(graph, img, img_p, layers):
caffe_model = load_caffe(img_p, layers)
#for i, n in enumerate(caffe_model.params):
# print n
param_provider = CaffeParamProvider(caffe_model)
if layers == 50:
num_blocks = [3, 4, 6, 3]
elif layers == 101:
num_blocks = [3, 4, 23, 3]
elif layers == 152:
num_blocks = [3, 8, 36, 3]
with tf.device('/cpu:0'):
images = tf.placeholder("float32", [None, 224, 224, 3], name="images")
logits = resnet.inference(images,
is_training=False,
num_blocks=num_blocks,
preprocess=True,
bottleneck=True)
prob = tf.nn.softmax(logits, name='prob')
# We write the metagraph first to avoid adding a bunch of
# assign ops that are used to set variables from caffe.
# The checkpoint is written to at the end.
tf.train.export_meta_graph(filename=meta_fn(layers))
vars_to_restore = tf.all_variables()
saver = tf.train.Saver(vars_to_restore)
sess = tf.Session()
sess.run(tf.initialize_all_variables())
assigns = []
for var in vars_to_restore:
#print var.op.name
data = parse_tf_varnames(param_provider, var.op.name, layers)
#print "caffe data shape", data.shape
#print "tf shape", var.get_shape()
assigns.append(var.assign(data))
sess.run(assigns)
#for op in tf.get_default_graph().get_operations():
# print op.name
i = [
graph.get_tensor_by_name("scale1/Relu:0"),
graph.get_tensor_by_name("scale2/MaxPool:0"),
graph.get_tensor_by_name("scale2/block1/Relu:0"),
graph.get_tensor_by_name("scale2/block2/Relu:0"),
graph.get_tensor_by_name("scale2/block3/Relu:0"),
graph.get_tensor_by_name("scale3/block1/Relu:0"),
graph.get_tensor_by_name("scale5/block3/Relu:0"),
graph.get_tensor_by_name("avg_pool:0"),
graph.get_tensor_by_name("prob:0"),
]
o = sess.run(i, {images: img[np.newaxis, :]})
assert_almost_equal(caffe_model.blobs['conv1'].data, o[0])
assert_almost_equal(caffe_model.blobs['pool1'].data, o[1])
assert_almost_equal(caffe_model.blobs['res2a'].data, o[2])
assert_almost_equal(caffe_model.blobs['res2b'].data, o[3])
assert_almost_equal(caffe_model.blobs['res2c'].data, o[4])
assert_almost_equal(caffe_model.blobs['res3a'].data, o[5])
assert_almost_equal(caffe_model.blobs['res5c'].data, o[6])
#assert_almost_equal(np.squeeze(caffe_model.blobs['pool5'].data), o[7])
print_prob(o[8][0])
prob_dist = np.linalg.norm(caffe_model.blobs['prob'].data - o[8])
print 'prob_dist ', prob_dist
assert prob_dist < 0.2 # XXX can this be tightened?
# We've already written the metagraph to avoid a bunch of assign ops.
saver.save(sess, checkpoint_fn(layers), write_meta_graph=False)
def save_graph(save_path):
graph = tf.get_default_graph()
graph_def = graph.as_graph_def()
print "graph_def byte size", graph_def.ByteSize()
graph_def_s = graph_def.SerializeToString()
with open(save_path, "wb") as f:
f.write(graph_def_s)
print "saved model to %s" % save_path
def main(_):
img = load_image("data/cat.jpg")
print img
img_p = preprocess(img)
for layers in [50, 101, 152]:
g = tf.Graph()
with g.as_default():
print "CONVERT", layers
convert(g, img, img_p, layers)
if __name__ == '__main__':
tf.app.run()