From 7ea9843501d8938b7d58c5a95eacc3158b5784ec Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Mon, 14 Mar 2016 07:34:42 -0800
Subject: [PATCH 1/2] Optimize `tf.nn.embedding_lookup()` and `tf.gather()`
 when shapes are known.

This avoids cross-device transfers of shape metadata, which is often
statically known at graph construction time. As a result, the load on
the parameter servers is reduced.
Change: 117135698
---
 tensorflow/python/ops/array_grad.py    | 12 +++++++----
 tensorflow/python/ops/embedding_ops.py | 29 ++++++++++++++++----------
 2 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py
index 2a3145ff8b7b0c..c7e0c514f90a2e 100644
--- a/tensorflow/python/ops/array_grad.py
+++ b/tensorflow/python/ops/array_grad.py
@@ -174,10 +174,14 @@ def _FillGrad(_, grad):
 
 @ops.RegisterGradient("Gather")
 def _GatherGrad(op, grad):
-  # op.inputs[0] can be large, so colocate the shape calculation with it.
-  with ops.colocate_with(op.inputs[0]):
-    dense_shape = array_ops.shape(op.inputs[0])
-    values_shape = array_ops.concat(0, [[-1], dense_shape[1:]])
+  if op.inputs[0].get_shape().is_fully_defined():
+    dense_shape = constant_op.constant(op.inputs[0].get_shape().as_list())
+    values_shape = [-1] + op.inputs[0].get_shape()[1:].as_list()
+  else:
+    # op.inputs[0] can be large, so colocate the shape calculation with it.
+    with ops.colocate_with(op.inputs[0]):
+      dense_shape = array_ops.shape(op.inputs[0])
+      values_shape = array_ops.concat(0, [[-1], dense_shape[1:]])
 
   values = array_ops.reshape(grad, values_shape)
   indices = array_ops.reshape(op.inputs[1], [-1])
diff --git a/tensorflow/python/ops/embedding_ops.py b/tensorflow/python/ops/embedding_ops.py
index d7f617844eb651..3f4ecc6efdae67 100644
--- a/tensorflow/python/ops/embedding_ops.py
+++ b/tensorflow/python/ops/embedding_ops.py
@@ -105,8 +105,11 @@ def embedding_lookup(params, ids, partition_strategy="mod", name=None,
         else:
           dim_0_sizes = []
           for p in xrange(np):
-            with ops.colocate_with(params[p]):
-              dim_0_sizes.append(array_ops.shape(params[p])[0])
+            if params[p].get_shape()[0].value is not None:
+              dim_0_sizes.append(params[p].get_shape()[0].value)
+            else:
+              with ops.colocate_with(params[p]):
+                dim_0_sizes.append(array_ops.shape(params[p])[0])
           num_total_ids = math_ops.reduce_sum(
               math_ops.cast(array_ops.pack(dim_0_sizes), flat_ids.dtype))
         ids_per_partition = num_total_ids // np
@@ -147,18 +150,22 @@ def embedding_lookup(params, ids, partition_strategy="mod", name=None,
       ret = data_flow_ops.dynamic_stitch(pindices, partitioned_result,
                                          name=name)
       # Reshape to reverse the flattening of ids.
-      # It's important that we compute params[0].shape on the right device
-      # to avoid data motion.
-      with ops.colocate_with(params[0]):
-        params_shape = array_ops.shape(params[0])
-      ret = array_ops.reshape(ret, array_ops.concat(0, [
-          array_ops.shape(ids), array_ops.slice(params_shape, [1], [-1])]))
-      # output shape = ids.shape + params[*].shape[1:]
-      # Normally the reshape is sufficient, but setting shape explicitly
-      # teaches shape inference that params[1:].get_shape() matters.
       element_shape = params[0].get_shape()[1:]
       for p in params[1:]:
         element_shape = element_shape.merge_with(p.get_shape()[1:])
+      if element_shape.is_fully_defined():
+        ret = array_ops.reshape(ret, array_ops.concat(0, [
+            array_ops.shape(ids), element_shape]))
+      else:
+        # It's important that we compute params[0].shape on the right device
+        # to avoid data motion.
+        with ops.colocate_with(params[0]):
+          params_shape = array_ops.shape(params[0])
+        ret = array_ops.reshape(ret, array_ops.concat(0, [
+            array_ops.shape(ids), array_ops.slice(params_shape, [1], [-1])]))
+      # output shape = ids.shape + params[*].shape[1:]
+      # Normally the reshape is sufficient, but setting shape explicitly
+      # teaches shape inference that params[1:].get_shape() matters.
       ret.set_shape(ids.get_shape().concatenate(element_shape))
       return ret
 

From bb06188d965cd4ad37d83c94c65d4db552dabc7b Mon Sep 17 00:00:00 2001
From: Vincent Vanhoucke <vanhoucke@google.com>
Date: Mon, 14 Mar 2016 08:29:07 -0800
Subject: [PATCH 2/2] Update Docker image to point to new data source. Change:
 117140354

---
 tensorflow/examples/udacity/README.md | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/examples/udacity/README.md b/tensorflow/examples/udacity/README.md
index a6d6f8742a553b..af26e2ee387c62 100644
--- a/tensorflow/examples/udacity/README.md
+++ b/tensorflow/examples/udacity/README.md
@@ -6,7 +6,7 @@ Course information can be found at https://www.udacity.com/course/deep-learning-
 Running the Docker container from the Google Cloud repository
 -------------------------------------------------------------
 
-    docker run -p 8888:8888 -it --rm b.gcr.io/tensorflow-udacity/assignments:0.3.0
+    docker run -p 8888:8888 -it --rm b.gcr.io/tensorflow-udacity/assignments:0.4.0
 
 Accessing the Notebooks
 -----------------------
@@ -61,8 +61,9 @@ This will allow you to save work and have access to generated files on the host
 Pushing a Google Cloud release
 ------------------------------
 
-    V=0.3.0
+    V=0.4.0
     docker tag $USER/assignments b.gcr.io/tensorflow-udacity/assignments:$V
+    gcloud docker push b.gcr.io/tensorflow-udacity/assignments
     docker tag -f $USER/assignments b.gcr.io/tensorflow-udacity/assignments:latest
     gcloud docker push b.gcr.io/tensorflow-udacity/assignments
 
@@ -72,3 +73,4 @@ History
 * 0.1.0: Initial release.
 * 0.2.0: Many fixes, including lower memory footprint and support for Python 3.
 * 0.3.0: Use 0.7.1 release.
+* 0.4.0: Move notMMNIST data for Google Cloud.