diff --git a/setup.py b/setup.py index 8dddda73a4e7..b20a85758e85 100644 --- a/setup.py +++ b/setup.py @@ -60,7 +60,7 @@ base_dir = os.path.dirname(os.path.abspath(__file__)) third_party_path = os.path.join(base_dir, 'third_party') -_libtpu_version = '0.1.dev20230202' +_libtpu_version = '0.1.dev20230213' _libtpu_storage_path = f'https://storage.googleapis.com/cloud-tpu-tpuvm-artifacts/wheels/libtpu-nightly/libtpu_nightly-{_libtpu_version}-py3-none-any.whl' diff --git a/tf_patches/cudnn_int8x32.diff b/tf_patches/cudnn_int8x32.diff new file mode 100644 index 000000000000..2aa51c4196b9 --- /dev/null +++ b/tf_patches/cudnn_int8x32.diff @@ -0,0 +1,564 @@ +# TODO: Try removing with the next pin update. See https://github.com/pytorch/xla/pull/4615#issuecomment-1428883781 +diff --git a/tensorflow/compiler/xla/debug_options_flags.cc b/tensorflow/compiler/xla/debug_options_flags.cc +index fac64573c54..dec0f1823c1 100644 +--- a/tensorflow/compiler/xla/debug_options_flags.cc ++++ b/tensorflow/compiler/xla/debug_options_flags.cc +@@ -110,7 +110,6 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() { + DebugOptions::PARTITIONING_ALGORITHM_NOOP); + + opts.set_xla_gpu_enable_triton_gemm(false); +- opts.set_xla_gpu_enable_cudnn_int8x32_convolution_reordering(false); + return opts; + } + +@@ -882,13 +881,6 @@ void MakeDebugOptionsFlags(std::vector* flag_list, + bool_setter_for(&DebugOptions::set_xla_gpu_enable_triton_gemm), + debug_options->xla_gpu_enable_triton_gemm(), + "Use Triton-based matrix multiplication.")); +- flag_list->push_back(tsl::Flag( +- "xla_gpu_enable_cudnn_int8x32_convolution_reordering", +- bool_setter_for( +- &DebugOptions:: +- set_xla_gpu_enable_cudnn_int8x32_convolution_reordering), +- debug_options->xla_gpu_enable_cudnn_int8x32_convolution_reordering(), +- "Enable cuDNN frontend for int8x32 convolutions with reordered filter.")); + } // NOLINT(readability/fn_size) + + // Allocates flag_values and flag_objects; this function must not be called more +diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD +index 7fff6bdad13..b21105a42d7 100644 +--- a/tensorflow/compiler/xla/service/gpu/BUILD ++++ b/tensorflow/compiler/xla/service/gpu/BUILD +@@ -1658,11 +1658,8 @@ cc_library( + srcs = ["cudnn_vectorize_convolutions.cc"], + hdrs = ["cudnn_vectorize_convolutions.h"], + deps = [ +- ":backend_configs_cc", +- ":cublas_cudnn", + ":cudnn_support_utils", + ":stream_executor_util", +- "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla/client:xla_builder", + "//tensorflow/compiler/xla/hlo/ir:hlo", +diff --git a/tensorflow/compiler/xla/service/gpu/conv_layout_normalization.cc b/tensorflow/compiler/xla/service/gpu/conv_layout_normalization.cc +index 7254e02f513..c2ab437b48e 100644 +--- a/tensorflow/compiler/xla/service/gpu/conv_layout_normalization.cc ++++ b/tensorflow/compiler/xla/service/gpu/conv_layout_normalization.cc +@@ -16,23 +16,22 @@ limitations under the License. + #include "tensorflow/compiler/xla/service/gpu/conv_layout_normalization.h" + + #include +-#include + #include + + #include "tensorflow/compiler/xla/hlo/ir/hlo_casting_utils.h" +-#include "tensorflow/compiler/xla/hlo/ir/hlo_instructions.h" + #include "tensorflow/compiler/xla/hlo/ir/hlo_module.h" +-#include "tensorflow/compiler/xla/layout_util.h" + #include "tensorflow/compiler/xla/service/gpu/cublas_cudnn.h" + #include "tensorflow/compiler/xla/service/hlo_creation_utils.h" +-#include "tensorflow/compiler/xla/shape_util.h" + + namespace xla { + namespace gpu { +-namespace { + +-StatusOr UpdateLayoutForCudnnConvolution( +- HloCustomCallInstruction* hlo) { ++StatusOr> ++NormalizeLayoutForCustomCallConvolution(HloCustomCallInstruction* hlo) { ++ if (!IsCustomCallToDnnConvolution(*hlo)) { ++ return {std::nullopt}; ++ } ++ + HloInstruction* lhs = hlo->mutable_operand(0); + HloInstruction* rhs = hlo->mutable_operand(1); + const ConvolutionDimensionNumbers& dim_numbers = +@@ -162,64 +161,8 @@ StatusOr UpdateLayoutForCudnnConvolution( + } else { + bc_to_orig = MakeBitcastHlo(normalized_conv, hlo->shape()); + } +- return bc_to_orig; +-} +- +-// Normalize the layout of cuDNN int8x32 filter reordering custom call +-// (implemented by calling `cudnnReorderFilterAndBias`), which should be +-// followed by a convolution. +-// Both the input and the output shape for the filter operand must have the +-// NCHW_VECT_C layout. +-HloInstruction* UpdateLayoutForCudnnConvolutionReordering( +- HloCustomCallInstruction* hlo) { +- // The custom call may have either one (filter) or two (filter and bias) +- // operands. The number of outputs matches the number of inputs. +- Shape const* filter_shape; +- Shape const* bias_shape; +- std::tie(filter_shape, bias_shape) = +- hlo->shape().IsTuple() ? std::make_tuple(&hlo->shape().tuple_shapes(0), +- &hlo->shape().tuple_shapes(1)) +- : std::make_tuple(&hlo->shape(), nullptr); +- +- // Transpose the filter to match the expected layout (NCHW_VECT_C). +- // This bias is 1D, so the shape doesn't need to be updated. +- auto new_filter_shape = +- ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout( +- *filter_shape); +- auto dimensions = LayoutUtil::MakeLayoutFromMajorToMinor( +- filter_shape->layout().minor_to_major()); +- HloInstruction* transpose = hlo->AddInstruction( +- HloInstruction::CreateTranspose(new_filter_shape, hlo->mutable_operand(0), +- dimensions.minor_to_major())); +- +- // Create a replacement custom-call with layout-normalized inputs. +- HloInstruction* custom_call; +- if (bias_shape != nullptr) { +- custom_call = +- hlo->parent()->AddInstruction(HloInstruction::CreateCustomCall( +- ShapeUtil::MakeTupleShape({new_filter_shape, *bias_shape}), +- {transpose, hlo->mutable_operand(1)}, hlo->custom_call_target())); +- } else { +- custom_call = +- hlo->parent()->AddInstruction(HloInstruction::CreateCustomCall( +- new_filter_shape, {transpose}, hlo->custom_call_target())); +- } +- return MakeBitcastHlo(custom_call, hlo->shape()); +-} +- +-} // namespace + +-StatusOr> NormalizeLayoutForGpuCustomCalls( +- HloCustomCallInstruction* hlo) { +- if (IsCustomCallToDnnConvolution(*hlo)) { +- TF_ASSIGN_OR_RETURN(HloInstruction * bc_to_orig, +- UpdateLayoutForCudnnConvolution(hlo)); +- return std::make_optional(bc_to_orig); +- } +- if (IsCudnnConvolutionReorder(*hlo)) { +- return std::make_optional(UpdateLayoutForCudnnConvolutionReordering(hlo)); +- } +- return {std::nullopt}; ++ return std::make_optional(bc_to_orig); + } + + } // end namespace gpu +diff --git a/tensorflow/compiler/xla/service/gpu/conv_layout_normalization.h b/tensorflow/compiler/xla/service/gpu/conv_layout_normalization.h +index bb38a299435..c6305784f94 100644 +--- a/tensorflow/compiler/xla/service/gpu/conv_layout_normalization.h ++++ b/tensorflow/compiler/xla/service/gpu/conv_layout_normalization.h +@@ -28,8 +28,8 @@ limitations under the License. + namespace xla { + namespace gpu { + +-StatusOr> NormalizeLayoutForGpuCustomCalls( +- HloCustomCallInstruction*); ++StatusOr> ++NormalizeLayoutForCustomCallConvolution(HloCustomCallInstruction*); + + } // end namespace gpu + } // end namespace xla +diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_vectorize_convolutions.cc b/tensorflow/compiler/xla/service/gpu/cudnn_vectorize_convolutions.cc +index f9af729cf36..7511e81a423 100644 +--- a/tensorflow/compiler/xla/service/gpu/cudnn_vectorize_convolutions.cc ++++ b/tensorflow/compiler/xla/service/gpu/cudnn_vectorize_convolutions.cc +@@ -16,21 +16,16 @@ limitations under the License. + #include "tensorflow/compiler/xla/service/gpu/cudnn_vectorize_convolutions.h" + + #include +-#include + #include + + #include "tensorflow/compiler/xla/client/xla_builder.h" + #include "tensorflow/compiler/xla/hlo/ir/hlo_casting_utils.h" + #include "tensorflow/compiler/xla/hlo/ir/hlo_instructions.h" +-#include "tensorflow/compiler/xla/service/gpu/backend_configs.pb.h" +-#include "tensorflow/compiler/xla/service/gpu/cublas_cudnn.h" + #include "tensorflow/compiler/xla/service/gpu/cudnn_support_utils.h" + #include "tensorflow/compiler/xla/service/gpu/stream_executor_util.h" +-#include "tensorflow/compiler/xla/shape_util.h" + + namespace xla { + namespace gpu { +-namespace { + + // Finds convolutions that this pass may be able to transform, namely int8_t + // cudnn forward or forward-bias-activation convolutions +@@ -254,37 +249,6 @@ static ConvolutionDimensionNumbers VectorizeDnums( + return dnums; + } + +-// Reorders the convolution's filter and bias (if present) according to +-// cudnnReorderFilterAndBias. Also marks that the filter + bias are reordered +-// in the conv's backend-config. +-Status ReorderInt8NchwVect(HloCustomCallInstruction* conv, XlaOp* operands) { +- // Update convolution backend config. +- TF_ASSIGN_OR_RETURN(auto config, +- conv->backend_config()); +- config.set_reordered_int8_nchw_vect(true); +- TF_RETURN_IF_ERROR(conv->set_backend_config(config)); +- +- XlaBuilder& builder = *operands->builder(); +- Shape filter_shape = builder.GetShape(operands[1]).value(); +- +- if (conv->operand_count() > 2) { +- // Reorder filter and bias. +- Shape bias_shape = builder.GetShape(operands[2]).value(); +- XlaOp reorder = CustomCall( +- &builder, std::string(kCudnnConvReorderFilterAndBiasCallTarget), +- {operands[1], operands[2]}, +- ShapeUtil::MakeTupleShape({filter_shape, bias_shape})); +- operands[1] = GetTupleElement(reorder, 0); +- operands[2] = GetTupleElement(reorder, 1); +- } else { +- // Reorder just the filter. +- operands[1] = +- CustomCall(&builder, std::string(kCudnnConvReorderFilterCallTarget), +- {operands[1]}, filter_shape); +- } +- return OkStatus(); +-} +- + // Tries to vectorize an already-vectorized convolution. + // + // That is, given a convolution of shape [N, C/k, H, W, k], changes it to have +@@ -371,13 +335,6 @@ static StatusOr TryRevectorizeConv( + conv->ToString()); + } + +- // Reorder filter and bias for the int8x32 convolutions. +- const auto& debug_options = conv->GetModule()->config().debug_options(); +- if (input_shape.element_type() == xla::S8 && vect_size == 32 && +- debug_options.xla_gpu_enable_cudnn_int8x32_convolution_reordering()) { +- TF_RETURN_IF_ERROR(ReorderInt8NchwVect(conv, new_operands.data())); +- } +- + // The custom-call returns a tuple (new_output_shape, u8[0]), where the second + // value in the tuple represents the convolution's scratch memory. + DimensionVector new_output_dims(output_shape.dimensions().begin(), +@@ -502,13 +459,6 @@ static StatusOr TryVectorizeConv( + conv->ToString()); + } + +- // Reorder filter and bias for the int8x32 convolutions. +- const auto& debug_options = conv->GetModule()->config().debug_options(); +- if (input_shape.element_type() == xla::S8 && vect_size == 32 && +- debug_options.xla_gpu_enable_cudnn_int8x32_convolution_reordering()) { +- TF_RETURN_IF_ERROR(ReorderInt8NchwVect(conv, new_operands.data())); +- } +- + // The custom-call returns a tuple (new_output_shape, u8[0]), where the second + // value in the tuple represents the convolution's scratch memory. + Shape new_output_shape = SplitShapeAtDim( +@@ -545,8 +495,6 @@ static StatusOr TryVectorizeConv( + return true; + } + +-} // namespace +- + StatusOr CudnnVectorizeConvolutions::Run( + HloModule* module, + const absl::flat_hash_set& execution_threads) { +diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +index 9e4d8c95bb0..9b242ce3b04 100644 +--- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc ++++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +@@ -836,7 +836,8 @@ Status GpuCompiler::OptimizeHloPostLayoutAssignment( + pipeline.AddPass(); + + if (debug_options.xla_gpu_normalize_layouts()) { +- pipeline.AddPass(&NormalizeLayoutForGpuCustomCalls); ++ pipeline.AddPass( ++ &NormalizeLayoutForCustomCallConvolution); + pipeline.AddPass>(options); + } + pipeline.AddPass(); +diff --git a/tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc b/tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc +index cb2aa3f2ac1..4c995b4f142 100644 +--- a/tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc ++++ b/tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc +@@ -3485,7 +3485,7 @@ std::tuple GetTensorVectorSizeAndDim( + tsl::StatusOr CreateCudnnTensor( + absl::Span dims, absl::Span strides, + int64_t uid, dnn::DataType dtype, int64_t vec_count, int64_t vec_dim, +- bool is_virtual = false, bool is_reordered_nchw_vect = false) { ++ bool is_virtual = false) { + auto tensor = cudnn_frontend::TensorBuilder() + .setDim(dims.size(), dims.data()) + .setStride(strides.size(), strides.data()) +@@ -3494,9 +3494,6 @@ tsl::StatusOr CreateCudnnTensor( + .setDataType(ToCudnnDataType(dtype)) + .setVectorCountAndDimension(vec_count, vec_dim) + .setVirtual(is_virtual) +- .setReorderType(is_reordered_nchw_vect +- ? CUDNN_TENSOR_REORDERING_INT8x32 +- : CUDNN_TENSOR_REORDERING_NONE) + .build(); + RETURN_MSG_IF_CUDNN_ERROR(tensor); + return tensor; +@@ -3523,6 +3520,11 @@ GetCudnnOperationGraph(dnn::ConvolutionKind kind, dnn::DataType input_type, + std::vector input_strides = input_descriptor.vectorized_strides( + dnn::DataLayout::kBatchDepthYX, vector_size, vector_dim); + ++ if (vector_size == 32) { ++ return tsl::errors::Internal( ++ "cuDNN frontend doesn't support Tx32 at the moment."); ++ } ++ + TF_ASSIGN_OR_RETURN(auto tensor_x, + CreateCudnnTensor(input_dims, input_strides, 'x', + input_type, vector_size, vector_dim)); +@@ -3547,13 +3549,9 @@ GetCudnnOperationGraph(dnn::ConvolutionKind kind, dnn::DataType input_type, + std::vector filter_strides = filter_descriptor.vectorized_strides( + dnn::FilterLayout::kOutputInputYX, vector_size, vector_dim); + +- TF_ASSIGN_OR_RETURN( +- auto tensor_w, +- CreateCudnnTensor( +- filter_dims, filter_strides, 'w', input_type, vector_size, vector_dim, +- /*is_virtual=*/false, +- /*is_reordered_nchw_vect=*/filter_descriptor.layout() == +- dnn::FilterLayout::kOutputInputYX32_CudnnReordered)); ++ TF_ASSIGN_OR_RETURN(auto tensor_w, ++ CreateCudnnTensor(filter_dims, filter_strides, 'w', ++ input_type, vector_size, vector_dim)); + + // conv_desc. + auto mode = convolution_descriptor.convolution_not_crosscorr() +@@ -3657,6 +3655,11 @@ GetCudnnFusedOperationGraph( + std::vector input_strides = input_descriptor.vectorized_strides( + dnn::DataLayout::kBatchDepthYX, vector_size, vector_dim); + ++ if (vector_size == 32) { ++ return tsl::errors::Internal( ++ "cuDNN frontend doesn't support Tx32 at the moment."); ++ } ++ + TF_ASSIGN_OR_RETURN(auto tensor_x, + CreateCudnnTensor(input_dims, input_strides, 'x', + input_type, vector_size, vector_dim)); +@@ -3681,13 +3684,9 @@ GetCudnnFusedOperationGraph( + dnn::FilterLayout::kOutputInputYX, vector_size, vector_dim); + std::vector filter_strides = filter_descriptor.vectorized_strides( + dnn::FilterLayout::kOutputInputYX, vector_size, vector_dim); +- TF_ASSIGN_OR_RETURN( +- auto tensor_w, +- CreateCudnnTensor( +- filter_dims, filter_strides, 'w', input_type, vector_size, vector_dim, +- /*is_virtual=*/false, +- /*is_reordered_nchw_vect=*/filter_descriptor.layout() == +- dnn::FilterLayout::kOutputInputYX32_CudnnReordered)); ++ TF_ASSIGN_OR_RETURN(auto tensor_w, ++ CreateCudnnTensor(filter_dims, filter_strides, 'w', ++ input_type, vector_size, vector_dim)); + + // For the purposes of the cudnn graph, say that the bias tensor has the same + // layout as the output tensor. It doesn't actually matter, because bias is a +@@ -4822,20 +4821,17 @@ tsl::Status CudnnSupport::GetConvolveRunners( + const dnn::ConvolutionDescriptor& convolution_descriptor, bool use_fallback, + ScratchAllocator* /*scratch_allocator*/, + std::vector>* out_exec_plans) { ++ // All current versions of the frontend API lack support for Tx32 ++ // convolutions. ++ const bool is_unsupported_x32 = ++ input_descriptor.layout() == dnn::kBatchDepthYX32; ++ + // cuDNN frontend support became sufficiently stable to use in 8.1. + // TODO(awpr): remove this condition once support for cuDNN 8.0 is dropped. + const bool is_pre_frontend_cudnn = CUDNN_VERSION < 8100; + +- // cuDNN frontend support for Tx32 convolutions added in 8.3. +- // If the filter is not reordered, do not use frontend (it is slow). +- const bool is_disabled_x32 = +- input_descriptor.layout() == dnn::kBatchDepthYX32 && +- (CUDNN_VERSION < 8300 || +- filter_descriptor.layout() != +- dnn::FilterLayout::kOutputInputYX32_CudnnReordered); +- + const bool actually_use_cudnn_frontend = +- use_cudnn_frontend && !is_pre_frontend_cudnn && !is_disabled_x32; ++ use_cudnn_frontend && !is_pre_frontend_cudnn && !is_unsupported_x32; + + if (use_cudnn_frontend && !actually_use_cudnn_frontend) { + // This will happen once per unique conv configuration/shape that gets +@@ -4847,8 +4843,8 @@ tsl::Status CudnnSupport::GetConvolveRunners( + << " filter: " << filter_descriptor.ToString() << "\n" + << " " << convolution_descriptor.ToString() << "\n" + << " ... because " +- << (is_disabled_x32 +- ? "Tx32 convolutions are disabled." ++ << (is_unsupported_x32 ++ ? "Tx32 convolutions are unsupported." + : "the current cuDNN version does not support it."); + } + +@@ -4933,12 +4929,6 @@ CudnnSupport::ConvolveRunnerFromDesc( + ToCudnnDataType(GetConvAccumulatorType(input_type))); + conv.set_use_tensor_op_math(algorithm_desc.tensor_ops_enabled()); + +- if (filter_descriptor.layout() == +- dnn::FilterLayout::kOutputInputYX32_CudnnReordered) { +- CHECK_CUDNN_OK( +- cudnnSetConvolutionReorderType(conv.handle(), CUDNN_NO_REORDER)); +- } +- + TF_ASSIGN_OR_RETURN( + auto runner, + CudnnLegacyConvRunner::Create( +@@ -5200,12 +5190,6 @@ CudnnSupport::FusedConvolveRunnerFromDesc( + ToCudnnDataType(GetConvAccumulatorType(input_type))); + conv.set_use_tensor_op_math(algorithm_desc.tensor_ops_enabled()); + +- if (filter_descriptor.layout() == +- dnn::FilterLayout::kOutputInputYX32_CudnnReordered) { +- CHECK_CUDNN_OK( +- cudnnSetConvolutionReorderType(conv.handle(), CUDNN_NO_REORDER)); +- } +- + // CUDNN v6 only supports CUDNN_NOT_PROPAGATE_NAN as the reluNanOpt for + // activation descriptor. Note that this will change the nan propagation + // behavior from separate conv, bias, and relu (which by default is +@@ -5275,26 +5259,23 @@ tsl::Status CudnnSupport::GetFusedConvolveRunners( + false; + #endif + ++ // All current versions of the frontend API lack support for Tx32 ++ // convolutions. ++ const bool is_unsupported_x32 = ++ input_descriptor.layout() == dnn::kBatchDepthYX32; ++ + // cuDNN frontend support became sufficiently stable to use in 8.1. + // TODO(awpr): remove this condition once support for cuDNN 8.0 is dropped. + const bool is_pre_frontend_cudnn = CUDNN_VERSION < 8100; + +- // cuDNN frontend support for Tx32 convolutions added in 8.3. +- // If the filter is not reordered, do not use frontend (it is slow). +- const bool is_disabled_x32 = +- input_descriptor.layout() == dnn::kBatchDepthYX32 && +- (CUDNN_VERSION < 8300 || +- filter_descriptor.layout() != +- dnn::FilterLayout::kOutputInputYX32_CudnnReordered); +- + const bool actually_use_cudnn_frontend = + use_cudnn_frontend && !is_pre_frontend_cudnn && +- !is_broken_identity_fused_conv && !is_disabled_x32; ++ !is_broken_identity_fused_conv && !is_unsupported_x32; + + if (use_cudnn_frontend && !actually_use_cudnn_frontend) { + const char* reason = "the current cuDNN version does not support it."; +- if (is_disabled_x32) { +- reason = "Tx32 convolutions are disabled."; ++ if (is_unsupported_x32) { ++ reason = "Tx32 convolutions are unsupported."; + } else if (is_broken_identity_fused_conv) { + reason = "it uses an identity activation."; + } +diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD +index a7ece361f17..efa6e41c4a0 100644 +--- a/tensorflow/compiler/xla/tests/BUILD ++++ b/tensorflow/compiler/xla/tests/BUILD +@@ -1298,7 +1298,7 @@ xla_test( + srcs = ["convolution_cudnn_test.cc"], + backend_tags = {"gpu": [ + "gpu", +- "requires-gpu-sm80", ++ "requires-gpu-sm70", + ]}, + backends = ["gpu"], + deps = [ +diff --git a/tensorflow/compiler/xla/tests/convolution_cudnn_test.cc b/tensorflow/compiler/xla/tests/convolution_cudnn_test.cc +index 04b9126302d..596f2be8f77 100644 +--- a/tensorflow/compiler/xla/tests/convolution_cudnn_test.cc ++++ b/tensorflow/compiler/xla/tests/convolution_cudnn_test.cc +@@ -61,60 +61,5 @@ ENTRY TestComputation { + EXPECT_TRUE(RunAndCompare(kHlo, ErrorSpec{0, 0})); + } + +-XLA_TEST_F(ConvolutionHloTest, TestCudnnConvInt8x32BiasNonConst) { +- // Test two GPU compiled HLOs, first version with vectorization disabled, +- // second with vectorization enabled. The reference implementation +- // (Interpreter) does not support the fused conv-add-relu-clamp operation, +- // thus cannot be used. +- if (!backend() +- .default_stream_executor() +- ->GetDeviceDescription() +- .cuda_compute_capability() +- .IsAtLeast(8)) { +- return; +- } +- constexpr char kHloBase[] = R"( +-HloModule TestModule, entry_computation_layout={(s8[4,48,48,64]{3,2,1,0},s8[64,3,3,64]{3,2,1,0},s8[64]{0})->s8[4,48,48,64]{3,2,1,0}} +- +-ENTRY TestComputation { +- input = s8[4,48,48,64]{3,2,1,0} parameter(0) +- filter = s8[64,3,3,64]{3,2,1,0} parameter(1) +- bias = s8[64]{0} parameter(2) +- convert.1 = f32[64]{0} convert(bias) +- cudnn-conv-bias-activation.3 = (s8[4,48,48,64]{3,2,1,0}, u8[0]{0}) custom-call(input, filter, convert.1), +- window={size=3x3 pad=1_1x1_1}, dim_labels=b01f_o01i->b01f, custom_call_target="__cudnn$convBiasActivationForward", +- backend_config="{\"activation_mode\":\"2\",\"conv_result_scale\":1,\"side_input_scale\":0,\"algorithm\":{ +- \"algo_id\":\"38\",\"math_type\":\"DEFAULT_MATH\",\"tuning_knobs\":{\"14\":\"5\",\"13\":\"1\",\"23\":\"0\",\"2\":\"1\"}, +- \"is_cudnn_frontend\":true,\"workspace_size\":\"0\"}}" +- ROOT get-tuple-element.1 = s8[4,48,48,64]{3,2,1,0} get-tuple-element(cudnn-conv-bias-activation.3), index=0 +-})"; +- constexpr char kHloVectorized[] = R"( +-HloModule TestModule, entry_computation_layout={(s8[4,48,48,64]{3,2,1,0},s8[64,3,3,64]{3,2,1,0},s8[64]{0})->s8[4,48,48,64]{3,2,1,0}} +- +-ENTRY TestComputation { +- input = s8[4,48,48,64]{3,2,1,0} parameter(0) +- bitcast.36 = s8[4,48,48,2,32]{4,3,2,1,0} bitcast(input) +- transpose = s8[4,2,48,48,32]{4,3,2,1,0} transpose(bitcast.36), dimensions={0,3,1,2,4} +- filter = s8[64,3,3,64]{3,2,1,0} parameter(1) +- bitcast.18 = s8[64,3,3,2,32]{4,3,2,1,0} bitcast(filter) +- transpose.3 = s8[64,2,3,3,32]{4,3,2,1,0} transpose(bitcast.18), dimensions={0,3,1,2,4} +- bias = s8[64]{0} parameter(2) +- convert.2 = f32[64]{0} convert(bias) +- custom-call.3 = (s8[64,2,3,3,32]{4,3,2,1,0}, f32[64]{0}) custom-call(transpose.3, convert.2), custom_call_target="__cudnn$convReorderFilterAndBias" +- get-tuple-element.2 = s8[64,2,3,3,32]{4,3,2,1,0} get-tuple-element(custom-call.3), index=0 +- get-tuple-element.3 = f32[64]{0} get-tuple-element(custom-call.3), index=1 +- cudnn-conv-bias-activation.4 = (s8[4,2,48,48,32]{4,3,2,1,0}, u8[51328]{0}) custom-call(transpose, get-tuple-element.2, get-tuple-element.3), +- window={size=3x3 pad=1_1x1_1}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBiasActivationForward", +- backend_config="{\"activation_mode\":\"2\",\"conv_result_scale\":1,\"side_input_scale\":0,\"algorithm\":{ +- \"algo_id\":\"7\",\"math_type\":\"DEFAULT_MATH\",\"tuning_knobs\":{\"7\":\"3\",\"2\":\"0\",\"5\":\"4\",\"6\":\"4\",\"4\":\"2\",\"21\":\"0\"}, +- \"is_cudnn_frontend\":true,\"workspace_size\":\"51328\"},\"reordered_int8_nchw_vect\":true}" +- get-tuple-element.6 = s8[4,2,48,48,32]{4,3,2,1,0} get-tuple-element(cudnn-conv-bias-activation.4), index=0 +- transpose.4 = s8[4,48,48,2,32]{4,3,2,1,0} transpose(get-tuple-element.6), dimensions={0,2,3,1,4} +- ROOT bitcast.1 = s8[4,48,48,64]{3,2,1,0} bitcast(transpose.4) +-})"; +- EXPECT_TRUE(RunAndCompareTwoModules(kHloBase, kHloVectorized, ErrorSpec{0, 0}, +- /*run_hlo_passes=*/false)); +-} +- + } // namespace + } // namespace xla +diff --git a/tensorflow/compiler/xla/translate/mhlo_to_lhlo_with_xla/mhlo_to_lhlo_with_xla.cc b/tensorflow/compiler/xla/translate/mhlo_to_lhlo_with_xla/mhlo_to_lhlo_with_xla.cc +index a25931e6fce..33de736599c 100644 +--- a/tensorflow/compiler/xla/translate/mhlo_to_lhlo_with_xla/mhlo_to_lhlo_with_xla.cc ++++ b/tensorflow/compiler/xla/translate/mhlo_to_lhlo_with_xla/mhlo_to_lhlo_with_xla.cc +@@ -1233,10 +1233,10 @@ LhloDialectEmitter::EmitDnnConvolutionReorderVectorized( + } + + CHECK_EQ(shape.rank(), 5); +- CHECK_EQ(shape.dimensions(4), 32); ++ CHECK_EQ(shape.dimensions_minor(0), 32); + llvm::SmallVector nchw = { +- shape.dimensions(0), shape.dimensions(1) * 32, shape.dimensions(2), +- shape.dimensions(3)}; ++ shape.dimensions_minor(4), shape.dimensions_minor(3) * 32, ++ shape.dimensions_minor(2), shape.dimensions_minor(1)}; + op->setAttr("filter_dims", GetI64DenseElementsAttr(nchw)); + + return op.getOperation(); +diff --git a/tensorflow/compiler/xla/xla.proto b/tensorflow/compiler/xla/xla.proto +index 78e4706edfe..9f5b65a5858 100644 +--- a/tensorflow/compiler/xla/xla.proto ++++ b/tensorflow/compiler/xla/xla.proto +@@ -462,9 +462,7 @@ message DebugOptions { + + bool xla_gpu_enable_triton_gemm = 188; + +- bool xla_gpu_enable_cudnn_int8x32_convolution_reordering = 189; +- +- // Next id: 190 ++ // Next id: 189 + + // Extra options to pass to the compilation backend (e.g. LLVM); specific + // interpretation of these values is left to the backend. diff --git a/tf_patches/ffp_gpu.diff b/tf_patches/ffp_gpu.diff deleted file mode 100644 index 7f71d6f28ceb..000000000000 --- a/tf_patches/ffp_gpu.diff +++ /dev/null @@ -1,40 +0,0 @@ -# enable gpu build, remove after next pin update -diff --git a/tensorflow/core/kernels/fft_ops.cc b/tensorflow/core/kernels/fft_ops.cc -index 5e0464e89f1..2d9cd438eb2 100644 ---- a/tensorflow/core/kernels/fft_ops.cc -+++ b/tensorflow/core/kernels/fft_ops.cc -@@ -455,7 +455,7 @@ class FftPlanCache { - cache_.erase(it); - } - --size_; -- return value; -+ return std::optional(std::move(value)); - } - - // Inserts a plan into the cache as long as there is still capacity. -diff --git a/tensorflow/compiler/xla/runtime/ffi.cc b/tensorflow/compiler/xla/runtime/ffi.cc -index 584d2008b11..231abca9c92 100644 ---- a/tensorflow/compiler/xla/runtime/ffi.cc -+++ b/tensorflow/compiler/xla/runtime/ffi.cc -@@ -380,7 +380,7 @@ absl::StatusOr FfiModulesState::state_vector() const { - - return absl::InternalError("Unsupported FFI module state"); - } -- return state_vector; -+ return std::move(state_vector); - } - - //===----------------------------------------------------------------------===// - diff --git a/tensorflow/compiler/xla/runtime/executable.cc b/tensorflow/compiler/xla/runtime/executable.cc -index 23c8fa7421e..9d715d5c61d 100644 ---- a/tensorflow/compiler/xla/runtime/executable.cc -+++ b/tensorflow/compiler/xla/runtime/executable.cc -@@ -332,7 +332,7 @@ absl::StatusOr Executable::Execute( - if (auto st = ReturnResults(ordinal, results, &call_frame); !st.ok()) - return st; - -- return exec_ref; -+ return std::move(exec_ref); - } - - ExecutionReference Executable::Execute(unsigned ordinal, CallFrame& call_frame, diff --git a/tf_patches/grpc_version.diff b/tf_patches/grpc_version.diff index 627c31d374d2..261efd356b6f 100644 --- a/tf_patches/grpc_version.diff +++ b/tf_patches/grpc_version.diff @@ -5,7 +5,7 @@ index a2ff5848fcf..930313e8c25 100644 +++ b/.bazelrc @@ -557,8 +557,8 @@ build:rbe_linux_py3_base --python_path="/usr/local/bin/python3.9" build:rbe_linux_py3_base --repo_env=TF_PYTHON_CONFIG_REPO="@ubuntu20.04-gcc9_manylinux2014-cuda11.2-cudnn8.1-tensorrt7.2_config_python3.9" - + build:rbe_win --config=rbe -build:rbe_win --crosstool_top="//tensorflow/tools/toolchains/win/tf_win_01232023:toolchain" -build:rbe_win --extra_toolchains="//tensorflow/tools/toolchains/win/tf_win_01232023:cc-toolchain-x64_windows" @@ -14,11 +14,23 @@ index a2ff5848fcf..930313e8c25 100644 build:rbe_win --extra_execution_platforms="//tensorflow/tools/toolchains/win:rbe_windows_ltsc2019" build:rbe_win --host_platform="//tensorflow/tools/toolchains/win:rbe_windows_ltsc2019" build:rbe_win --platforms="//tensorflow/tools/toolchains/win:rbe_windows_ltsc2019" +diff --git a/tensorflow/compiler/xla/tests/build_defs.bzl b/tensorflow/compiler/xla/tests/build_defs.bzl +index 10b30f63755..306512e21b8 100644 +--- a/tensorflow/compiler/xla/tests/build_defs.bzl ++++ b/tensorflow/compiler/xla/tests/build_defs.bzl +@@ -112,6 +112,7 @@ def xla_test( + native.cc_library( + name = "%s_lib" % name, + srcs = srcs, ++ tags = tags, + copts = copts, + testonly = True, + deps = deps, diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files -index 3d16e3b862a..1a2b9e015eb 100644 +index 79c176026f7..e878daf41db 100644 --- a/tensorflow/opensource_only.files +++ b/tensorflow/opensource_only.files -@@ -169,7 +169,6 @@ tensorflow/tools/toolchains/win/bazel_211/BUILD: +@@ -171,7 +171,6 @@ tensorflow/tools/toolchains/win/bazel_211/BUILD: tensorflow/tools/toolchains/win/tf_win_01072022/BUILD: tensorflow/tools/toolchains/win/tf_win_01112023/BUILD: tensorflow/tools/toolchains/win/tf_win_01122022/BUILD: @@ -35,7 +47,7 @@ index 3d1c95c22a9..fb99462a900 100644 load("//tensorflow/core/platform:build_config.bzl", "tf_protos_grappler") -load("//tensorflow:tensorflow.bzl", "if_not_windows") +load("//tensorflow:tensorflow.bzl", "VERSION", "if_not_windows", "tf_python_pybind_static_deps") - + package( # copybara:uncomment default_applicable_licenses = ["//tensorflow:license"], @@ -249,13 +249,12 @@ tf_python_pybind_extension( @@ -59,19 +71,19 @@ index 3d1c95c22a9..fb99462a900 100644 "//tensorflow/core:framework_headers_lib", "//tensorflow/core:lib_headers_for_pybind", diff --git a/tensorflow/python/saved_model/BUILD b/tensorflow/python/saved_model/BUILD -index 6dba97c839d..7e8e0b9c2bd 100644 +index 57bf7a8a3b1..d0b51d17c81 100644 --- a/tensorflow/python/saved_model/BUILD +++ b/tensorflow/python/saved_model/BUILD @@ -3,7 +3,7 @@ - + load("//tensorflow:strict.default.bzl", "py_strict_library") load("//tensorflow:tensorflow.default.bzl", "cuda_py_test", "tf_py_test", "tf_pybind_cc_library_wrapper", "tf_python_pybind_extension") -load("//tensorflow:tensorflow.bzl", "if_google") +load("//tensorflow:tensorflow.bzl", "VERSION", "if_google", "tf_python_pybind_static_deps") - + package( # copybara:uncomment default_applicable_licenses = ["//tensorflow:license"], -@@ -737,14 +737,13 @@ tf_python_pybind_extension( +@@ -727,14 +727,13 @@ tf_python_pybind_extension( "pywrap_saved_model_fingerprinting.h", "pywrap_saved_model_metrics.h", ], @@ -93,7 +105,7 @@ index 6dba97c839d..7e8e0b9c2bd 100644 deps = [ ":pywrap_saved_model_headers", diff --git a/tensorflow/tools/ci_build/release/requirements_common.txt b/tensorflow/tools/ci_build/release/requirements_common.txt -index e2328493a61..b4b785fc53d 100644 +index 051387bf64e..be3f16a4edb 100644 --- a/tensorflow/tools/ci_build/release/requirements_common.txt +++ b/tensorflow/tools/ci_build/release/requirements_common.txt @@ -11,7 +11,7 @@ h5py ~= 3.8.0 # Earliest version for Python 3.11 @@ -131,11 +143,11 @@ index 444c2d151ab..8e97ea56f9f 100644 - def_fp.write("\t ??1CoordinatedTask@tensorflow@@UEAA@XZ\n") # for _pywrap_tfe - def_fp.write("\t ?CopyFrom@CoordinatedTask@tensorflow@@QEAAXAEBV12@@Z\n") # for _pywrap_tfe - def_fp.write("\t ??0CoordinatedTask@tensorflow@@IEAA@PEAVArena@protobuf@google@@_N@Z\n") # for _pywrap_tfe - + # Each symbols returned by undname matches the same position in candidates. # We compare on undname but use the decorated name from candidates. diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py -index 915ad896567..00d85d32d79 100644 +index cc17d57800e..329588662e9 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -104,7 +104,7 @@ REQUIRED_PACKAGES = [ @@ -2310,7 +2322,7 @@ index c6b49ba7ca4..827e51a4a8d 100644 @@ -401,13 +401,6 @@ def py_proto_library( # is not explicitly listed in py_libs. Instead, host system is assumed to # have grpc installed. - + - genproto_deps = [] - for dep in deps: - if dep != "@com_google_protobuf//:protobuf_python": @@ -2328,10 +2340,10 @@ index c6b49ba7ca4..827e51a4a8d 100644 - deps = genproto_deps, + deps = [s + "_genproto" for s in deps], ) - + if default_runtime and not default_runtime in py_libs + deps: diff --git a/tensorflow/workspace2.bzl b/tensorflow/workspace2.bzl -index 80c4442433b..852a0fbb91f 100644 +index 80072dec290..3506f7d22c0 100644 --- a/tensorflow/workspace2.bzl +++ b/tensorflow/workspace2.bzl @@ -457,14 +457,14 @@ def _tf_repositories(): @@ -2350,29 +2362,56 @@ index 80c4442433b..852a0fbb91f 100644 - urls = tf_mirror_urls("https://github.com/protocolbuffers/protobuf/archive/v3.21.9.zip"), + urls = tf_mirror_urls("https://github.com/protocolbuffers/protobuf/archive/v3.9.2.zip"), ) - + + tf_http_archive( +@@ -485,9 +485,9 @@ def _tf_repositories(): + + tf_http_archive( + name = "com_google_fuzztest", +- sha256 = "c75f224b34c3c62ee901381fb743f6326f7b91caae0ceb8fe62f3fd36f187627", +- strip_prefix = "fuzztest-58b4e7065924f1a284952b84ea827ce35a87e4dc", +- urls = tf_mirror_urls("https://github.com/google/fuzztest/archive/58b4e7065924f1a284952b84ea827ce35a87e4dc.zip"), ++ sha256 = "3fe79ede8e860ba7331987b2c1f84d3eeaf5bea00fd76398d6ff0006635586c6", ++ strip_prefix = "fuzztest-6d79ceb1dc2398e02a39efc23ce40d68baa16a42", ++ urls = tf_mirror_urls("https://github.com/google/fuzztest/archive/6d79ceb1dc2398e02a39efc23ce40d68baa16a42.zip"), + ) + tf_http_archive( @@ -567,10 +567,10 @@ def _tf_repositories(): - + tf_http_archive( name = "boringssl", -- sha256 = "b460f8673f3393e58ce506e9cdde7f2c3b2575b075f214cb819fb57d809f052b", -- strip_prefix = "boringssl-bb41bc007079982da419c0ec3186e510cbcf09d0", +- sha256 = "534fa658bd845fd974b50b10f444d392dfd0d93768c4a51b61263fd37d851c40", +- strip_prefix = "boringssl-b9232f9e27e5668bc0414879dcdedb2a59ea75f2", + sha256 = "fd0e06a8a57dcba1132f91fef1c1327191e913b6c50a84633f7175090972196c", + strip_prefix = "boringssl-f9eff21461cf79556a0fb8ca9b1bf60c3b283ce8", system_build_file = "//third_party/systemlibs:boringssl.BUILD", -- urls = tf_mirror_urls("https://github.com/google/boringssl/archive/bb41bc007079982da419c0ec3186e510cbcf09d0.zip"), +- urls = tf_mirror_urls("https://github.com/google/boringssl/archive/b9232f9e27e5668bc0414879dcdedb2a59ea75f2.tar.gz"), + urls = tf_mirror_urls("https://github.com/google/boringssl/archive/f9eff21461cf79556a0fb8ca9b1bf60c3b283ce8.tar.gz"), ) - + # Note: if you update this, you have to update libpng too. See cl/437813808 +@@ -873,13 +873,6 @@ def _tf_repositories(): + system_build_file = "//third_party/systemlibs:pybind11.BUILD", + ) + +- tf_http_archive( +- name = "pybind11_protobuf", +- urls = tf_mirror_urls("https://github.com/pybind/pybind11_protobuf/archive/80f3440cd8fee124e077e2e47a8a17b78b451363.zip"), +- sha256 = "", +- strip_prefix = "pybind11_protobuf-80f3440cd8fee124e077e2e47a8a17b78b451363", +- ) +- + tf_http_archive( + name = "wrapt", + build_file = "//third_party:wrapt.BUILD", diff --git a/tensorflow/workspace3.bzl b/tensorflow/workspace3.bzl index 91871db22c8..5cfa0553579 100644 --- a/tensorflow/workspace3.bzl +++ b/tensorflow/workspace3.bzl @@ -37,11 +37,11 @@ def workspace(): ) - + # Maven dependencies. - RULES_JVM_EXTERNAL_TAG = "4.3" + RULES_JVM_EXTERNAL_TAG = "3.2" @@ -2383,20 +2422,20 @@ index 91871db22c8..5cfa0553579 100644 + sha256 = "82262ff4223c5fda6fb7ff8bd63db8131b51b413d26eb49e3131037e79e324af", url = "https://github.com/bazelbuild/rules_jvm_external/archive/%s.zip" % RULES_JVM_EXTERNAL_TAG, ) - + diff --git a/third_party/pprof.BUILD b/third_party/pprof.BUILD index b7d33118665..dd38a09cd0c 100644 --- a/third_party/pprof.BUILD +++ b/third_party/pprof.BUILD @@ -4,7 +4,7 @@ package( - + licenses(["notice"]) # MIT - + -load("@org_tensorflow//tensorflow/tsl/platform/default:build_config.bzl", "py_proto_library") +load("@com_google_protobuf//:protobuf.bzl", "py_proto_library") - + exports_files(["pprof/LICENSE"]) - + diff --git a/third_party/protobuf/protobuf.patch b/third_party/protobuf/protobuf.patch index 9d928ba175f..5ff9f425b76 100644 --- a/third_party/protobuf/protobuf.patch @@ -2413,14 +2452,14 @@ index 9d928ba175f..5ff9f425b76 100644 +@@ -23,7 +23,7 @@ config_setting( + # ZLIB configuration + ################################################################################ -+ ++ +-ZLIB_DEPS = ["@zlib//:zlib"] ++ZLIB_DEPS = ["@zlib"] -+ ++ + ################################################################################ + # Protobuf Runtime Library +@@ -100,6 +100,7 @@ LINK_OPTS = select({ -+ ++ + load( + ":protobuf.bzl", ++ "adapt_proto_library", @@ -2436,7 +2475,7 @@ index 9d928ba175f..5ff9f425b76 100644 ) - -@@ -135,6 +136,7 @@ -+ ++ +@@ -213,6 +215,7 @@ cc_library( copts = COPTS, includes = ["src/"], @@ -2450,7 +2489,7 @@ index 9d928ba175f..5ff9f425b76 100644 +@@ -255,13 +258,15 @@ filegroup( + visibility = ["//visibility:public"], + ) -+ ++ +-cc_proto_library( ++adapt_proto_library( ++ name = "cc_wkt_protos_genproto", @@ -2468,9 +2507,9 @@ index 9d928ba175f..5ff9f425b76 100644 ++ deprecation = "Only for backward compatibility. Do not use.", + visibility = ["//visibility:public"], + ) -+ ++ +@@ -978,10 +983,10 @@ cc_library( -+ ++ + proto_lang_toolchain( + name = "cc_toolchain", ++ blacklisted_protos = [proto + "_proto" for proto in WELL_KNOWN_PROTO_MAP.keys()], @@ -2479,7 +2518,7 @@ index 9d928ba175f..5ff9f425b76 100644 + visibility = ["//visibility:public"], +- blacklisted_protos = [":_internal_wkt_protos_genrule"], + ) -+ ++ + proto_lang_toolchain( +diff --git a/protobuf.bzl b/protobuf.bzl +index e0653321f..4156a1275 100644 @@ -2488,7 +2527,7 @@ index 9d928ba175f..5ff9f425b76 100644 +@@ -1,4 +1,5 @@ + load("@bazel_skylib//lib:versions.bzl", "versions") ++load("@rules_proto//proto:defs.bzl", "ProtoInfo") -+ ++ + def _GetPath(ctx, path): + if ctx.label.workspace_root: +@@ -85,6 +86,8 @@ def _proto_gen_impl(ctx): @@ -2497,13 +2536,13 @@ index 9d928ba175f..5ff9f425b76 100644 + deps += dep.proto.deps ++ import_flags = depset(import_flags).to_list() ++ deps = depset(deps).to_list() -+ ++ + if not ctx.attr.gen_cc and not ctx.attr.gen_py and not ctx.executable.plugin: + return struct( +@@ -222,6 +225,29 @@ Args: + outs: a list of labels of the expected outputs from the protocol compiler. + """ -+ ++ ++def _adapt_proto_library_impl(ctx): ++ deps = [dep[ProtoInfo] for dep in ctx.attr.deps] ++ @@ -2553,11 +2592,11 @@ index 9d928ba175f..5ff9f425b76 100644 + **kargs: other keyword arguments that are passed to cc_library. +- + """ -+ ++ + includes = [] + if include != None: + includes = [include] -+ ++ +- if internal_bootstrap_hack: +- # For pre-checked-in generated files, we add the internal_bootstrap_hack +- # which will skip the codegen action. @@ -2588,7 +2627,7 @@ index 9d928ba175f..5ff9f425b76 100644 -@@ -58,6 +58,37 @@ - : 0) \ - : PyBytes_AsStringAndSize(ob, (charpp), (sizep))) -- +- +@@ -63,6 +63,37 @@ + : PyBytes_AsStringAndSize(ob, (charpp), (sizep))) + #endif @@ -2624,7 +2663,7 @@ index 9d928ba175f..5ff9f425b76 100644 + goto exit; } } -- +- -- if (frame->f_code->co_filename == nullptr) { +- +- if (frame->f_code->co_filename == NULL) { @@ -2639,7 +2678,7 @@ index 9d928ba175f..5ff9f425b76 100644 - return false; + goto exit; } -- +- + - if (frame->f_globals != frame->f_locals) { + frame_globals = PyFrame_GetGlobals(frame); @@ -2657,7 +2696,7 @@ index 9d928ba175f..5ff9f425b76 100644 +@@ -2991,8 +2991,12 @@ bool InitProto2MessageModule(PyObject *m) { + reinterpret_cast( + &RepeatedCompositeContainer_Type)); - + - // If the calling code is not a _pb2.py file, raise AttributeError. \ No newline at end of file +- // Register them as collections.Sequence @@ -2684,3 +2723,12 @@ index 9d928ba175f..5ff9f425b76 100644 + PyErr_Format(PyExc_ValueError, + "UnknownField does not exist. " \ No newline at end of file +diff --git a/third_party/pybind11_protobuf/BUILD b/third_party/pybind11_protobuf/BUILD +deleted file mode 100644 +index 3b946e563d4..00000000000 +--- a/third_party/pybind11_protobuf/BUILD ++++ /dev/null +@@ -1,3 +0,0 @@ +-# Necessary for bazel to recognize this as a package. +- +-# copybara:uncomment package(default_applicable_licenses = ["//tensorflow:license"]) diff --git a/third_party/tensorflow b/third_party/tensorflow index a81addf25f8c..f7759359f842 160000 --- a/third_party/tensorflow +++ b/third_party/tensorflow @@ -1 +1 @@ -Subproject commit a81addf25f8c7420fe22674205e4fa410980bb5e +Subproject commit f7759359f8420d3ca7b9fd19493f2a01bd47b4ef diff --git a/torch_xla/csrc/xla_sharding_util.cpp b/torch_xla/csrc/xla_sharding_util.cpp index 499f03b6d478..4a182ee78eeb 100644 --- a/torch_xla/csrc/xla_sharding_util.cpp +++ b/torch_xla/csrc/xla_sharding_util.cpp @@ -158,7 +158,8 @@ xla::HloModuleProto ShardingUtil::SpmdPartitioningPass( // TODO(yeounoh) side-effecting ops gets assigned replicated sharding. pass.AddPass( /*is_spmd=*/true, /*propagate_metadata=*/false, - /*allow_spmd_sharding_propagation_to_output=*/true); + /*allow_spmd_sharding_propagation_to_output=*/ + absl::MakeConstSpan({true})); pass.AddPass( /*num_partitions=*/num_partitions, /*num_replicas=*/num_replicas, options,