tensorflow · ankurtaly · May 25, 2018 · May 19, 2018 · May 19, 2018 · May 19, 2018
diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h
@@ -104,6 +104,10 @@ class VSpace {
       gtl::ArraySlice<Gradient*> output_gradients,
       std::vector<Gradient*>* result) const = 0;
 
+  // Marks the following gradient as a result so it's not consumed by backward
+  // functions.
+  virtual void MarkAsResult(Gradient* gradient) const = 0;
+
   // Deletes the input tensor.
   virtual void DeleteGradient(Gradient* gradient) const = 0;
 
@@ -356,8 +360,7 @@ BackpropInitialState<BackwardFunction> PrepareBackprop(
         count_it->second++;
       } else {
         result.tensor_usage_counts[it] = 1;
-        if (sources_set.find(it) == sources_set.end() &&
-            tensor_tape.find(it) != tensor_tape.end()) {
+        if (tensor_tape.find(it) != tensor_tape.end()) {
           tensor_stack.push_back(it);
         }
       }
@@ -522,10 +525,15 @@ Status GradientTape<Gradient, BackwardFunction>::ComputeGradient(
         }
       } else {
         any_gradient_nonzero = true;
-        out_gradients.push_back(vspace.AggregateGradients(grad_it->second));
+        auto new_gradients = vspace.AggregateGradients(grad_it->second);
         if (sources_set.find(grad_it->first) == sources_set.end()) {
           gradients.erase(grad_it);
+        } else {
+          grad_it->second.clear();
+          grad_it->second.push_back(new_gradients);
+          vspace.MarkAsResult(new_gradients);
         }
+        out_gradients.push_back(new_gradients);
       }
     }
     std::vector<Gradient*> in_gradients;

diff --git a/tensorflow/compiler/aot/embedded_protocol_buffers.cc b/tensorflow/compiler/aot/embedded_protocol_buffers.cc
@@ -82,7 +82,8 @@ static StatusOr<string> CodegenModule(llvm::TargetMachine* target_machine,
   llvm::legacy::PassManager codegen_passes;
 
   if (target_machine->addPassesToEmitFile(
-          codegen_passes, ostream, llvm::TargetMachine::CGFT_ObjectFile)) {
+          codegen_passes, ostream, nullptr,
+          llvm::TargetMachine::CGFT_ObjectFile)) {
     return xla::InternalError(
         "Could not create pass pipeline to generate object file");
   }

diff --git a/tensorflow/compiler/jit/xla_interpreter_device.cc b/tensorflow/compiler/jit/xla_interpreter_device.cc
@@ -48,10 +48,11 @@ Status XlaInterpreterDeviceFactory::CreateDevices(
   registration.compile_resource_ops = true;
 
   std::unique_ptr<XlaDevice> device;
-  TF_RETURN_IF_ERROR(XlaDevice::Create("Interpreter", DEVICE_XLA_INTERPRETER, 0,
-                                       DEVICE_INTERPRETER_XLA_JIT, options,
-                                       name_prefix, registration,
-                                       /*transfer_as_literal=*/false, &device));
+  TF_RETURN_IF_ERROR(XlaDevice::Create(
+      "Interpreter", DEVICE_XLA_INTERPRETER, 0, DEVICE_INTERPRETER_XLA_JIT,
+      options, name_prefix, registration,
+      /*transfer_as_literal=*/false,
+      /*shape_representation_fn=*/{}, &device));
   devices->push_back(device.release());
   return Status::OK();
 }

diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
@@ -196,9 +196,11 @@ tf_xla_py_test(
     name = "oom_test",
     size = "medium",
     srcs = ["oom_test.py"],
+    # TODO(b/80081500): Re-enable on GPU. Disabled on 2018-05-21.
     disabled_backends = [
         "cpu",
         "cpu_ondemand",
+        "gpu",
     ],
     tags = [
         # Allocates very large amounts of memory and does not work under TSAN.

diff --git a/tensorflow/compiler/tests/randomized_tests.cc b/tensorflow/compiler/tests/randomized_tests.cc
@@ -619,8 +619,8 @@ std::vector<int64> OpTest::ImageDims(TensorFormat format, int batch,
         dims.push_back(dim);
       }
       break;
-    case FORMAT_NCHW_VECT_C:
-      LOG(FATAL) << "FORMAT_NCHW_VECT_C not supported.";
+    default:
+      LOG(FATAL) << "Tensor format " << ToString(format) << " not supported.";
   }
   return dims;
 }

diff --git a/tensorflow/compiler/tf2xla/cc/BUILD b/tensorflow/compiler/tf2xla/cc/BUILD
@@ -27,3 +27,25 @@ cc_library(
         "//tensorflow/core:protos_all_cc",
     ],
 )
+
+tf_gen_op_wrapper_cc(
+    name = "xla_jit_op_gen",
+    out_ops_file = "ops/xla_jit_op",
+    deps = ["//tensorflow/compiler/jit/ops:xla_ops"],
+)
+
+cc_library(
+    name = "xla_jit_ops",
+    srcs = ["ops/xla_jit_op.cc"],
+    hdrs = ["ops/xla_jit_op.h"],
+    deps = [
+        "//tensorflow/cc:const_op",
+        "//tensorflow/cc:ops",
+        "//tensorflow/cc:scope",
+        "//tensorflow/compiler/jit/ops:xla_ops",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+    ],
+)
diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD
@@ -583,6 +583,7 @@ tf_cc_test(
         ":shape_util",
         ":test",
         ":xla_data_proto",
+        "//tensorflow/core:test",
         "//tensorflow/core:test_main",
     ],
 )

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
@@ -761,6 +761,23 @@ cc_library(
     ],
 )
 
+tf_cc_test(
+    name = "shaped_buffer_test",
+    srcs = ["shaped_buffer_test.cc"],
+    deps = [
+        ":cpu_plugin",
+        ":device_memory_allocator",
+        ":platform_util",
+        ":shaped_buffer",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla:test_helpers",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:ptr_util",
+        "//tensorflow/core:test",
+    ],
+)
+
 cc_library(
     name = "executable",
     srcs = ["executable.cc"],

diff --git a/tensorflow/compiler/xla/service/cpu/cpu_options.cc b/tensorflow/compiler/xla/service/cpu/cpu_options.cc
@@ -22,6 +22,8 @@ namespace {
 const char* const kXlaOptimizeForSizeCpuOption = "xla_cpu_optimize_for_size";
 const char* const kXlaDisableVectorizedReduce = "xla_disable_vectorized_reduce";
 const char* const kLlvmIrDotTilingFactor = "xla_llvm_dot_tiling_factor";
+const char* const kXlaEnableExperimentalLlvmIrGemm =
+    "xla_enable_experimental_llvm_ir_gemm";
 
 }  // namespace
 
@@ -54,6 +56,12 @@ tensorflow::gtl::optional<int64> LlvmIrGemvTilingFactor(
   return tensorflow::gtl::nullopt;
 }
 
+bool EnableExperimentalLlvmIrGemm(const HloModuleConfig& config) {
+  const auto& extra_options_map =
+      config.debug_options().xla_backend_extra_options();
+  return extra_options_map.count(kXlaEnableExperimentalLlvmIrGemm) > 0;
+}
+
 }  // namespace options
 }  // namespace cpu
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_options.h b/tensorflow/compiler/xla/service/cpu/cpu_options.h
@@ -26,6 +26,7 @@ namespace options {
 
 bool OptimizeForSizeRequested(const HloModuleConfig& config);
 bool VectorizedReduceDisabled(const HloModuleConfig& config);
+bool EnableExperimentalLlvmIrGemm(const HloModuleConfig& config);
 tensorflow::gtl::optional<int64> LlvmIrGemvTilingFactor(
     const HloModuleConfig& config);