From 7ea9843501d8938b7d58c5a95eacc3158b5784ec Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Mon, 14 Mar 2016 07:34:42 -0800 Subject: [PATCH 1/2] Optimize `tf.nn.embedding_lookup()` and `tf.gather()` when shapes are known. This avoids cross-device transfers of shape metadata, which is often statically known at graph construction time. As a result, the load on the parameter servers is reduced. Change: 117135698 --- tensorflow/python/ops/array_grad.py | 12 +++++++---- tensorflow/python/ops/embedding_ops.py | 29 ++++++++++++++++---------- 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py index 2a3145ff8b7b0c..c7e0c514f90a2e 100644 --- a/tensorflow/python/ops/array_grad.py +++ b/tensorflow/python/ops/array_grad.py @@ -174,10 +174,14 @@ def _FillGrad(_, grad): @ops.RegisterGradient("Gather") def _GatherGrad(op, grad): - # op.inputs[0] can be large, so colocate the shape calculation with it. - with ops.colocate_with(op.inputs[0]): - dense_shape = array_ops.shape(op.inputs[0]) - values_shape = array_ops.concat(0, [[-1], dense_shape[1:]]) + if op.inputs[0].get_shape().is_fully_defined(): + dense_shape = constant_op.constant(op.inputs[0].get_shape().as_list()) + values_shape = [-1] + op.inputs[0].get_shape()[1:].as_list() + else: + # op.inputs[0] can be large, so colocate the shape calculation with it. + with ops.colocate_with(op.inputs[0]): + dense_shape = array_ops.shape(op.inputs[0]) + values_shape = array_ops.concat(0, [[-1], dense_shape[1:]]) values = array_ops.reshape(grad, values_shape) indices = array_ops.reshape(op.inputs[1], [-1]) diff --git a/tensorflow/python/ops/embedding_ops.py b/tensorflow/python/ops/embedding_ops.py index d7f617844eb651..3f4ecc6efdae67 100644 --- a/tensorflow/python/ops/embedding_ops.py +++ b/tensorflow/python/ops/embedding_ops.py @@ -105,8 +105,11 @@ def embedding_lookup(params, ids, partition_strategy="mod", name=None, else: dim_0_sizes = [] for p in xrange(np): - with ops.colocate_with(params[p]): - dim_0_sizes.append(array_ops.shape(params[p])[0]) + if params[p].get_shape()[0].value is not None: + dim_0_sizes.append(params[p].get_shape()[0].value) + else: + with ops.colocate_with(params[p]): + dim_0_sizes.append(array_ops.shape(params[p])[0]) num_total_ids = math_ops.reduce_sum( math_ops.cast(array_ops.pack(dim_0_sizes), flat_ids.dtype)) ids_per_partition = num_total_ids // np @@ -147,18 +150,22 @@ def embedding_lookup(params, ids, partition_strategy="mod", name=None, ret = data_flow_ops.dynamic_stitch(pindices, partitioned_result, name=name) # Reshape to reverse the flattening of ids. - # It's important that we compute params[0].shape on the right device - # to avoid data motion. - with ops.colocate_with(params[0]): - params_shape = array_ops.shape(params[0]) - ret = array_ops.reshape(ret, array_ops.concat(0, [ - array_ops.shape(ids), array_ops.slice(params_shape, [1], [-1])])) - # output shape = ids.shape + params[*].shape[1:] - # Normally the reshape is sufficient, but setting shape explicitly - # teaches shape inference that params[1:].get_shape() matters. element_shape = params[0].get_shape()[1:] for p in params[1:]: element_shape = element_shape.merge_with(p.get_shape()[1:]) + if element_shape.is_fully_defined(): + ret = array_ops.reshape(ret, array_ops.concat(0, [ + array_ops.shape(ids), element_shape])) + else: + # It's important that we compute params[0].shape on the right device + # to avoid data motion. + with ops.colocate_with(params[0]): + params_shape = array_ops.shape(params[0]) + ret = array_ops.reshape(ret, array_ops.concat(0, [ + array_ops.shape(ids), array_ops.slice(params_shape, [1], [-1])])) + # output shape = ids.shape + params[*].shape[1:] + # Normally the reshape is sufficient, but setting shape explicitly + # teaches shape inference that params[1:].get_shape() matters. ret.set_shape(ids.get_shape().concatenate(element_shape)) return ret From bb06188d965cd4ad37d83c94c65d4db552dabc7b Mon Sep 17 00:00:00 2001 From: Vincent Vanhoucke Date: Mon, 14 Mar 2016 08:29:07 -0800 Subject: [PATCH 2/2] Update Docker image to point to new data source. Change: 117140354 --- tensorflow/examples/udacity/README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/examples/udacity/README.md b/tensorflow/examples/udacity/README.md index a6d6f8742a553b..af26e2ee387c62 100644 --- a/tensorflow/examples/udacity/README.md +++ b/tensorflow/examples/udacity/README.md @@ -6,7 +6,7 @@ Course information can be found at https://www.udacity.com/course/deep-learning- Running the Docker container from the Google Cloud repository ------------------------------------------------------------- - docker run -p 8888:8888 -it --rm b.gcr.io/tensorflow-udacity/assignments:0.3.0 + docker run -p 8888:8888 -it --rm b.gcr.io/tensorflow-udacity/assignments:0.4.0 Accessing the Notebooks ----------------------- @@ -61,8 +61,9 @@ This will allow you to save work and have access to generated files on the host Pushing a Google Cloud release ------------------------------ - V=0.3.0 + V=0.4.0 docker tag $USER/assignments b.gcr.io/tensorflow-udacity/assignments:$V + gcloud docker push b.gcr.io/tensorflow-udacity/assignments docker tag -f $USER/assignments b.gcr.io/tensorflow-udacity/assignments:latest gcloud docker push b.gcr.io/tensorflow-udacity/assignments @@ -72,3 +73,4 @@ History * 0.1.0: Initial release. * 0.2.0: Many fixes, including lower memory footprint and support for Python 3. * 0.3.0: Use 0.7.1 release. +* 0.4.0: Move notMMNIST data for Google Cloud.