tensorflow · ekelsen · Feb 23, 2018 · Feb 22, 2018 · Feb 22, 2018 · Feb 22, 2018
diff --git a/configure.py b/configure.py
@@ -1067,7 +1067,7 @@ def is_compatible(tensorrt_lib, cuda_ver, cudnn_ver):
           break
 
     # Reset and Retry
-    if len(possible_files):
+    if possible_files:
       print('TensorRT libraries found in one the following directories',
             'are not compatible with selected cuda and cudnn installations')
       print(trt_install_path)
@@ -1076,7 +1076,8 @@ def is_compatible(tensorrt_lib, cuda_ver, cudnn_ver):
       if search_result:
         print(libnvinfer_path_from_ldconfig)
     else:
-      print('Invalid path to TensorRT. None of the following files can be found:')
+      print(
+          'Invalid path to TensorRT. None of the following files can be found:')
       print(trt_install_path)
       print(os.path.join(trt_install_path, 'lib'))
       print(os.path.join(trt_install_path, 'lib64'))

diff --git a/tensorflow/compiler/tests/slice_ops_test.py b/tensorflow/compiler/tests/slice_ops_test.py
@@ -20,6 +20,7 @@
 
 from tensorflow.compiler.tests.xla_test import XLATestCase
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import googletest
 
@@ -137,6 +138,34 @@ def test1DNegativeStride(self):
 
         self.assertAllEqual([6, 4], result)
 
+  def test2DDegenerate(self):
+    for dtype in self.numeric_types:
+      with self.test_session():
+        i = array_ops.placeholder(dtype, shape=[2, 3])
+        with self.test_scope():
+          o = array_ops.strided_slice(i, [-1, 0], [0, 3])
+        params = {
+            i: [[0, 1, 2],
+                [3, 4, 5]]
+        }
+        result = o.eval(feed_dict=params)
+
+        self.assertEqual(tensor_shape.TensorShape((0, 3)), result.shape)
+
+  def test2DDegenerateNegativeStride(self):
+    for dtype in self.numeric_types:
+      with self.test_session():
+        i = array_ops.placeholder(dtype, shape=[2, 3])
+        with self.test_scope():
+          o = array_ops.strided_slice(i, [0, 0], [-1, 3], [-1, 1])
+        params = {
+            i: [[0, 1, 2],
+                [3, 4, 5]]
+        }
+        result = o.eval(feed_dict=params)
+
+        self.assertEqual(tensor_shape.TensorShape((0, 3)), result.shape)
+
   def test3D(self):
     for dtype in self.numeric_types:
       with self.test_session():

diff --git a/tensorflow/compiler/tf2xla/kernels/strided_slice_op.cc b/tensorflow/compiler/tf2xla/kernels/strided_slice_op.cc
@@ -77,13 +77,14 @@ class StridedSliceOp : public XlaOpKernel {
     for (int i = 0; i < begin.size(); ++i) {
       if (strides[i] > 0) {
         slice_begin.push_back(begin[i]);
-        slice_end.push_back(end[i]);
+        slice_end.push_back(std::max(end[i], begin[i]));
         slice_strides.push_back(strides[i]);
       } else {
         // Negative stride: swap begin and end, add 1 because the interval
         // is semi-open, and mark the dimension to be reversed.
         slice_begin.push_back(input_shape.dim_size(i) - begin[i] - 1);
-        slice_end.push_back(input_shape.dim_size(i) - end[i] - 1);
+        slice_end.push_back(std::max(input_shape.dim_size(i) - end[i] - 1,
+                                     input_shape.dim_size(i) - begin[i] - 1));
         slice_strides.push_back(-strides[i]);
         dimensions_to_reverse.push_back(i);
       }

diff --git a/tensorflow/compiler/xla/client/computation_builder.h b/tensorflow/compiler/xla/client/computation_builder.h
@@ -198,9 +198,8 @@ class ComputationBuilder {
                                 tensorflow::gtl::ArraySlice<int64> new_sizes);
 
   // Enqueues an operation onto the computation that collapses the operand, from
-  // minor to major order, then reshapes it into the shape with the given
-  // dimension sizes, also from major to minor. Conceptually, this is a limited
-  // form of "shape casting".
+  // first to last dimension (C order), then reshapes it to the given dimension
+  // sizes. Conceptually, this is a limited form of "shape casting".
   ComputationDataHandle Reshape(const ComputationDataHandle& operand,
                                 tensorflow::gtl::ArraySlice<int64> new_sizes);
 

diff --git a/tensorflow/compiler/xla/python/local_computation_builder.cc b/tensorflow/compiler/xla/python/local_computation_builder.cc
@@ -368,6 +368,12 @@ ComputationDataHandle LocalComputationBuilder::Slice(
   return builder_.Slice(operand, start_indices, limit_indices, strides);
 }
 
+ComputationDataHandle LocalComputationBuilder::SliceInDim(
+    const ComputationDataHandle& operand, int64 start_index, int64 limit_index,
+    int64 stride, int64 dimno) {
+  return builder_.SliceInDim(operand, start_index, limit_index, stride, dimno);
+}
+
 ComputationDataHandle LocalComputationBuilder::DynamicSlice(
     const ComputationDataHandle& operand,
     const ComputationDataHandle& start_indices,

diff --git a/tensorflow/compiler/xla/python/local_computation_builder.h b/tensorflow/compiler/xla/python/local_computation_builder.h
@@ -170,6 +170,10 @@ class LocalComputationBuilder {
                               tensorflow::gtl::ArraySlice<int64> limit_indices,
                               tensorflow::gtl::ArraySlice<int64> strides);
 
+  ComputationDataHandle SliceInDim(const ComputationDataHandle& operand,
+                                   int64 start_index, int64 limit_index,
+                                   int64 stride, int64 dimno);
+
   ComputationDataHandle DynamicSlice(
       const ComputationDataHandle& operand,
       const ComputationDataHandle& start_indices,

diff --git a/tensorflow/compiler/xla/python/local_computation_builder.i b/tensorflow/compiler/xla/python/local_computation_builder.i
@@ -886,6 +886,7 @@ tensorflow::ImportNumpy();
 %unignore xla::swig::LocalComputationBuilder::Collapse;
 %unignore xla::swig::LocalComputationBuilder::CrossReplicaSum;
 %unignore xla::swig::LocalComputationBuilder::Slice;
+%unignore xla::swig::LocalComputationBuilder::SliceInDim;
 %unignore xla::swig::LocalComputationBuilder::DynamicSlice;
 %unignore xla::swig::LocalComputationBuilder::DynamicUpdateSlice;
 %unignore xla::swig::LocalComputationBuilder::ConcatInDim;

diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py
@@ -656,7 +656,7 @@ def Pad(self, operand, padding_value, padding_config):
         representing the configuration of the padding operation.
 
     Returns:
-      A ComputationDataHandle representing the added pad op.
+      A ComputationDataHandle representing the added Pad op.
     """
     if not isinstance(padding_config, xla_data_pb2.PaddingConfig):
       padding_config = GetPaddingConfigFromTriples(padding_config)
@@ -666,7 +666,20 @@ def Pad(self, operand, padding_value, padding_config):
                          padding_config))
 
   def Reshape(self, operand, dimensions, new_sizes):
-    """Reshape op."""
+    """Enqueues a reshape op onto the computation.
+
+    Args:
+      operand: ComputationDataHandle representing the array to be reshaped.
+      dimensions: sequence of integers encoding the order in which dimensions
+        are collapsed or None, in which case dimensions are flattened in order.
+      new_sizes: sequence of integers encoding the new dimension sizes (shape).
+
+    Returns:
+      A ComputationDataHandle representing the added Reshape op.
+    """
+    if dimensions is None:
+      ndim = len(self.GetShape(operand).dimensions())
+      dimensions = tuple(range(ndim))
     return _wrap_data_handle(
         self._client.Reshape(
             _unwrap_data_handle(operand), dimensions, new_sizes))
@@ -772,11 +785,27 @@ def Slice(self, operand, start_indices, limit_indices, strides=None):
       strides = [1] * len(start_indices)
     return _wrap_data_handle(
         self._client.Slice(
-            _unwrap_data_handle(operand),
-            start_indices,
-            limit_indices,
+            _unwrap_data_handle(operand), start_indices, limit_indices,
             strides))
 
+  def SliceInDim(self, operand, start_index, limit_index, stride, dimno):
+    """Enqueues a slice-in-dimension operation onto the computation.
+
+    Args:
+      operand: ComputationDataHandle for the N dimensional array to be sliced.
+      start_index: an integer containing the start index of the slice.
+      limit_index: an integer containing the end index of the slice.
+      stride: an integer containing the stride size for the slice.
+      dimno: an integer indicating the dimension along which to slice.
+
+    Returns:
+      A ComputationDataHandle representing the added Slice op.
+    """
+    return _wrap_data_handle(
+        self._client.SliceInDim(
+            _unwrap_data_handle(operand), start_index, limit_index, stride,
+            dimno))
+
   def DynamicSlice(self, operand, start_indices, slice_sizes):
     """Enqueues a slice op with dynamic start indices onto the computation.
 

diff --git a/tensorflow/compiler/xla/python/xla_client_test.py b/tensorflow/compiler/xla/python/xla_client_test.py
@@ -762,6 +762,23 @@ def testSlice(self):
         [3, 2])
     self._ExecuteAndCompareExact(c, expected=[[4, 5], [7, 8]])
 
+  def testSliceInDim(self):
+    c = self._NewComputation()
+    c.SliceInDim(
+        c.Constant(NumpyArrayS32([[1, 2, 3], [4, 5, 6], [7, 8, 9]])),
+        start_index=1,
+        limit_index=2,
+        stride=1,
+        dimno=1)
+    self._ExecuteAndCompareExact(c, expected=[[2], [5], [8]])
+    c.SliceInDim(
+        c.Constant(NumpyArrayS32([[1, 2, 3], [4, 5, 6], [7, 8, 9]])),
+        start_index=0,
+        limit_index=3,
+        stride=2,
+        dimno=0)
+    self._ExecuteAndCompareExact(c, expected=[[1, 2, 3], [7, 8, 9]])
+
   def testDynamicSlice(self):
     c = self._NewComputation()
     c.DynamicSlice(

diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -824,6 +824,12 @@ class HloInstruction {
   // Precondition: opcode() == HloOpcode::kSend or HloOpcode::kRecv
   int64 channel_id() const { return channel_id_; }
 
+  // Returns the channel name associated with the instruction. The name is
+  // used to identify host Send/Recv operations.
+  //
+  // Precondition: opcode() == HloOpcode::kHostCompute
+  string channel_name() const { return channel_name_; }
+
   // Returns feature_index field associated with the instruction. The index
   // represents the index of the feature dimension.
   //

diff --git a/tensorflow/compiler/xla/service/hlo_module_config.h b/tensorflow/compiler/xla/service/hlo_module_config.h
@@ -67,6 +67,15 @@ class HloModuleConfig {
   bool hlo_profiling_enabled() const { return hlo_profiling_enabled_; }
   void enable_hlo_profiling(bool enabled) { hlo_profiling_enabled_ = enabled; }
 
+  // Sets/returns whether this is a "host module".  Host modules are used to
+  // record the data- and control-flow dependencies of host side computation
+  // that communicates with compiled code.  They are used for analysis and
+  // scheduling purposes, but no code is generated.
+  bool is_host_module() const { return is_host_module_; }
+  void set_is_host_module(bool is_host_module) {
+    is_host_module_ = is_host_module;
+  }
+
   // Sets/returns the module seed set during execution.
   void set_seed(uint64 seed) { seed_ = seed; }
   uint64 seed() const { return seed_; }
@@ -104,6 +113,9 @@ class HloModuleConfig {
   // Whether to enable HLO-level profiling.
   bool hlo_profiling_enabled_ = false;
 
+  // Whether this is a 'host module'.
+  bool is_host_module_ = false;
+
   // Module/graph-level seed handle.
   uint64 seed_ = 0;
 

diff --git a/tensorflow/compiler/xla/service/hlo_sharding.cc b/tensorflow/compiler/xla/service/hlo_sharding.cc
@@ -183,6 +183,10 @@ Status HloSharding::ValidateTuple(const Shape& shape, int64 num_devices) const {
   // shape tree.
   ShapeTree<HloSharding> shape_tree = GetAsShapeTree(shape);
   for (const auto& index_to_sharding : shape_tree.leaves()) {
+    if (index_to_sharding.first.empty()) {
+      // An empty tuple has a ShapeTree with a single leaf at the empty index.
+      continue;
+    }
     Status status = index_to_sharding.second.ValidateNonTuple(
         ShapeUtil::GetSubshape(shape, index_to_sharding.first), num_devices);
     if (!status.ok()) {
@@ -222,7 +226,7 @@ Status HloSharding::ValidateNonTuple(const Shape& shape,
   Status status = Status::OK();
   std::set<int64> seen_cores;
   tile_assignment_.Each(
-      [&](tensorflow::gtl::ArraySlice<int64> indices, uint32 core) {
+      [&](tensorflow::gtl::ArraySlice<int64> indices, int32 core) {
         // Don't overwrite a bad status, so we report the first error.
         if (status.ok()) {
           if (core >= num_devices) {

diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
@@ -1556,8 +1556,10 @@ tensorflow::Status Service::Op(const OpRequest* arg, OpResponse* result) {
     case OpRequest::kSendRequest: {
       TF_RETURN_IF_ERROR(
           channel_tracker_.RegisterSend(arg->send_request().channel_handle()));
-      TF_RETURN_IF_ERROR(computation->AddSendInstruction(arg->send_request()));
-      return tensorflow::Status::OK();
+      // Send does not return a value, but we need a handle to be able to
+      // set OpMetadata and OpSharding (device assignment).
+      handle_status = computation->AddSendInstruction(arg->send_request());
+      break;
     }
     case OpRequest::kRecvRequest: {
       TF_RETURN_IF_ERROR(

diff --git a/tensorflow/compiler/xla/service/user_computation.cc b/tensorflow/compiler/xla/service/user_computation.cc
@@ -226,7 +226,8 @@ StatusOr<ComputationDataHandle> UserComputation::AddParameterInstruction(
   return handle;
 }
 
-Status UserComputation::AddSendInstruction(const SendRequest& send_request) {
+StatusOr<ComputationDataHandle> UserComputation::AddSendInstruction(
+    const SendRequest& send_request) {
   tensorflow::mutex_lock lock(mutex_);
 
   // Check if the operand of the instruction is valid.
@@ -244,7 +245,7 @@ Status UserComputation::AddSendInstruction(const SendRequest& send_request) {
   VLOG(1) << "AddSendInstruction (" << GetVersionedHandleInternal()
           << "), data handle " << handle.handle() << ": "
           << send_request.ShortDebugString();
-  return Status::OK();
+  return handle;
 }
 
 StatusOr<ComputationDataHandle> UserComputation::AddRecvInstruction(

diff --git a/tensorflow/compiler/xla/service/user_computation.h b/tensorflow/compiler/xla/service/user_computation.h
@@ -236,7 +236,8 @@ class UserComputation {
       const UserComputation& false_computation);
 
   // Enqueues a Send instruction onto this user computation.
-  Status AddSendInstruction(const SendRequest& send_request);
+  StatusOr<ComputationDataHandle> AddSendInstruction(
+      const SendRequest& send_request);
 
   // Enqueues a Recv instruction onto this user computation.
   StatusOr<ComputationDataHandle> AddRecvInstruction(