diff --git a/setup.py b/setup.py
index 8dddda73a4e7..b20a85758e85 100644
--- a/setup.py
+++ b/setup.py
@@ -60,7 +60,7 @@
 base_dir = os.path.dirname(os.path.abspath(__file__))
 third_party_path = os.path.join(base_dir, 'third_party')
 
-_libtpu_version = '0.1.dev20230202'
+_libtpu_version = '0.1.dev20230213'
 _libtpu_storage_path = f'https://storage.googleapis.com/cloud-tpu-tpuvm-artifacts/wheels/libtpu-nightly/libtpu_nightly-{_libtpu_version}-py3-none-any.whl'
 
 
diff --git a/tf_patches/cudnn_int8x32.diff b/tf_patches/cudnn_int8x32.diff
new file mode 100644
index 000000000000..2aa51c4196b9
--- /dev/null
+++ b/tf_patches/cudnn_int8x32.diff
@@ -0,0 +1,564 @@
+# TODO: Try removing with the next pin update. See https://github.com/pytorch/xla/pull/4615#issuecomment-1428883781
+diff --git a/tensorflow/compiler/xla/debug_options_flags.cc b/tensorflow/compiler/xla/debug_options_flags.cc
+index fac64573c54..dec0f1823c1 100644
+--- a/tensorflow/compiler/xla/debug_options_flags.cc
++++ b/tensorflow/compiler/xla/debug_options_flags.cc
+@@ -110,7 +110,6 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() {
+       DebugOptions::PARTITIONING_ALGORITHM_NOOP);
+
+   opts.set_xla_gpu_enable_triton_gemm(false);
+-  opts.set_xla_gpu_enable_cudnn_int8x32_convolution_reordering(false);
+   return opts;
+ }
+
+@@ -882,13 +881,6 @@ void MakeDebugOptionsFlags(std::vector<tsl::Flag>* flag_list,
+                 bool_setter_for(&DebugOptions::set_xla_gpu_enable_triton_gemm),
+                 debug_options->xla_gpu_enable_triton_gemm(),
+                 "Use Triton-based matrix multiplication."));
+-  flag_list->push_back(tsl::Flag(
+-      "xla_gpu_enable_cudnn_int8x32_convolution_reordering",
+-      bool_setter_for(
+-          &DebugOptions::
+-              set_xla_gpu_enable_cudnn_int8x32_convolution_reordering),
+-      debug_options->xla_gpu_enable_cudnn_int8x32_convolution_reordering(),
+-      "Enable cuDNN frontend for int8x32 convolutions with reordered filter."));
+ }  // NOLINT(readability/fn_size)
+
+ // Allocates flag_values and flag_objects; this function must not be called more
+diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
+index 7fff6bdad13..b21105a42d7 100644
+--- a/tensorflow/compiler/xla/service/gpu/BUILD
++++ b/tensorflow/compiler/xla/service/gpu/BUILD
+@@ -1658,11 +1658,8 @@ cc_library(
+     srcs = ["cudnn_vectorize_convolutions.cc"],
+     hdrs = ["cudnn_vectorize_convolutions.h"],
+     deps = [
+-        ":backend_configs_cc",
+-        ":cublas_cudnn",
+         ":cudnn_support_utils",
+         ":stream_executor_util",
+-        "//tensorflow/compiler/xla:shape_util",
+         "//tensorflow/compiler/xla:statusor",
+         "//tensorflow/compiler/xla/client:xla_builder",
+         "//tensorflow/compiler/xla/hlo/ir:hlo",
+diff --git a/tensorflow/compiler/xla/service/gpu/conv_layout_normalization.cc b/tensorflow/compiler/xla/service/gpu/conv_layout_normalization.cc
+index 7254e02f513..c2ab437b48e 100644
+--- a/tensorflow/compiler/xla/service/gpu/conv_layout_normalization.cc
++++ b/tensorflow/compiler/xla/service/gpu/conv_layout_normalization.cc
+@@ -16,23 +16,22 @@ limitations under the License.
+ #include "tensorflow/compiler/xla/service/gpu/conv_layout_normalization.h"
+
+ #include <optional>
+-#include <tuple>
+ #include <vector>
+
+ #include "tensorflow/compiler/xla/hlo/ir/hlo_casting_utils.h"
+-#include "tensorflow/compiler/xla/hlo/ir/hlo_instructions.h"
+ #include "tensorflow/compiler/xla/hlo/ir/hlo_module.h"
+-#include "tensorflow/compiler/xla/layout_util.h"
+ #include "tensorflow/compiler/xla/service/gpu/cublas_cudnn.h"
+ #include "tensorflow/compiler/xla/service/hlo_creation_utils.h"
+-#include "tensorflow/compiler/xla/shape_util.h"
+
+ namespace xla {
+ namespace gpu {
+-namespace {
+
+-StatusOr<HloInstruction*> UpdateLayoutForCudnnConvolution(
+-    HloCustomCallInstruction* hlo) {
++StatusOr<std::optional<HloInstruction*>>
++NormalizeLayoutForCustomCallConvolution(HloCustomCallInstruction* hlo) {
++  if (!IsCustomCallToDnnConvolution(*hlo)) {
++    return {std::nullopt};
++  }
++
+   HloInstruction* lhs = hlo->mutable_operand(0);
+   HloInstruction* rhs = hlo->mutable_operand(1);
+   const ConvolutionDimensionNumbers& dim_numbers =
+@@ -162,64 +161,8 @@ StatusOr<HloInstruction*> UpdateLayoutForCudnnConvolution(
+   } else {
+     bc_to_orig = MakeBitcastHlo(normalized_conv, hlo->shape());
+   }
+-  return bc_to_orig;
+-}
+-
+-// Normalize the layout of cuDNN int8x32 filter reordering custom call
+-// (implemented by calling `cudnnReorderFilterAndBias`), which should be
+-// followed by a convolution.
+-// Both the input and the output shape for the filter operand must have the
+-// NCHW_VECT_C layout.
+-HloInstruction* UpdateLayoutForCudnnConvolutionReordering(
+-    HloCustomCallInstruction* hlo) {
+-  // The custom call may have either one (filter) or two (filter and bias)
+-  // operands. The number of outputs matches the number of inputs.
+-  Shape const* filter_shape;
+-  Shape const* bias_shape;
+-  std::tie(filter_shape, bias_shape) =
+-      hlo->shape().IsTuple() ? std::make_tuple(&hlo->shape().tuple_shapes(0),
+-                                               &hlo->shape().tuple_shapes(1))
+-                             : std::make_tuple(&hlo->shape(), nullptr);
+-
+-  // Transpose the filter to match the expected layout (NCHW_VECT_C).
+-  // This bias is 1D, so the shape doesn't need to be updated.
+-  auto new_filter_shape =
+-      ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(
+-          *filter_shape);
+-  auto dimensions = LayoutUtil::MakeLayoutFromMajorToMinor(
+-      filter_shape->layout().minor_to_major());
+-  HloInstruction* transpose = hlo->AddInstruction(
+-      HloInstruction::CreateTranspose(new_filter_shape, hlo->mutable_operand(0),
+-                                      dimensions.minor_to_major()));
+-
+-  // Create a replacement custom-call with layout-normalized inputs.
+-  HloInstruction* custom_call;
+-  if (bias_shape != nullptr) {
+-    custom_call =
+-        hlo->parent()->AddInstruction(HloInstruction::CreateCustomCall(
+-            ShapeUtil::MakeTupleShape({new_filter_shape, *bias_shape}),
+-            {transpose, hlo->mutable_operand(1)}, hlo->custom_call_target()));
+-  } else {
+-    custom_call =
+-        hlo->parent()->AddInstruction(HloInstruction::CreateCustomCall(
+-            new_filter_shape, {transpose}, hlo->custom_call_target()));
+-  }
+-  return MakeBitcastHlo(custom_call, hlo->shape());
+-}
+-
+-}  // namespace
+
+-StatusOr<std::optional<HloInstruction*>> NormalizeLayoutForGpuCustomCalls(
+-    HloCustomCallInstruction* hlo) {
+-  if (IsCustomCallToDnnConvolution(*hlo)) {
+-    TF_ASSIGN_OR_RETURN(HloInstruction * bc_to_orig,
+-                        UpdateLayoutForCudnnConvolution(hlo));
+-    return std::make_optional(bc_to_orig);
+-  }
+-  if (IsCudnnConvolutionReorder(*hlo)) {
+-    return std::make_optional(UpdateLayoutForCudnnConvolutionReordering(hlo));
+-  }
+-  return {std::nullopt};
++  return std::make_optional(bc_to_orig);
+ }
+
+ }  // end namespace gpu
+diff --git a/tensorflow/compiler/xla/service/gpu/conv_layout_normalization.h b/tensorflow/compiler/xla/service/gpu/conv_layout_normalization.h
+index bb38a299435..c6305784f94 100644
+--- a/tensorflow/compiler/xla/service/gpu/conv_layout_normalization.h
++++ b/tensorflow/compiler/xla/service/gpu/conv_layout_normalization.h
+@@ -28,8 +28,8 @@ limitations under the License.
+ namespace xla {
+ namespace gpu {
+
+-StatusOr<std::optional<HloInstruction*>> NormalizeLayoutForGpuCustomCalls(
+-    HloCustomCallInstruction*);
++StatusOr<std::optional<HloInstruction*>>
++NormalizeLayoutForCustomCallConvolution(HloCustomCallInstruction*);
+
+ }  // end namespace gpu
+ }  // end namespace xla
+diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_vectorize_convolutions.cc b/tensorflow/compiler/xla/service/gpu/cudnn_vectorize_convolutions.cc
+index f9af729cf36..7511e81a423 100644
+--- a/tensorflow/compiler/xla/service/gpu/cudnn_vectorize_convolutions.cc
++++ b/tensorflow/compiler/xla/service/gpu/cudnn_vectorize_convolutions.cc
+@@ -16,21 +16,16 @@ limitations under the License.
+ #include "tensorflow/compiler/xla/service/gpu/cudnn_vectorize_convolutions.h"
+
+ #include <optional>
+-#include <string>
+ #include <vector>
+
+ #include "tensorflow/compiler/xla/client/xla_builder.h"
+ #include "tensorflow/compiler/xla/hlo/ir/hlo_casting_utils.h"
+ #include "tensorflow/compiler/xla/hlo/ir/hlo_instructions.h"
+-#include "tensorflow/compiler/xla/service/gpu/backend_configs.pb.h"
+-#include "tensorflow/compiler/xla/service/gpu/cublas_cudnn.h"
+ #include "tensorflow/compiler/xla/service/gpu/cudnn_support_utils.h"
+ #include "tensorflow/compiler/xla/service/gpu/stream_executor_util.h"
+-#include "tensorflow/compiler/xla/shape_util.h"
+
+ namespace xla {
+ namespace gpu {
+-namespace {
+
+ // Finds convolutions that this pass may be able to transform, namely int8_t
+ // cudnn forward or forward-bias-activation convolutions
+@@ -254,37 +249,6 @@ static ConvolutionDimensionNumbers VectorizeDnums(
+   return dnums;
+ }
+
+-// Reorders the convolution's filter and bias (if present) according to
+-// cudnnReorderFilterAndBias.  Also marks that the filter + bias are reordered
+-// in the conv's backend-config.
+-Status ReorderInt8NchwVect(HloCustomCallInstruction* conv, XlaOp* operands) {
+-  // Update convolution backend config.
+-  TF_ASSIGN_OR_RETURN(auto config,
+-                      conv->backend_config<CudnnConvBackendConfig>());
+-  config.set_reordered_int8_nchw_vect(true);
+-  TF_RETURN_IF_ERROR(conv->set_backend_config(config));
+-
+-  XlaBuilder& builder = *operands->builder();
+-  Shape filter_shape = builder.GetShape(operands[1]).value();
+-
+-  if (conv->operand_count() > 2) {
+-    // Reorder filter and bias.
+-    Shape bias_shape = builder.GetShape(operands[2]).value();
+-    XlaOp reorder = CustomCall(
+-        &builder, std::string(kCudnnConvReorderFilterAndBiasCallTarget),
+-        {operands[1], operands[2]},
+-        ShapeUtil::MakeTupleShape({filter_shape, bias_shape}));
+-    operands[1] = GetTupleElement(reorder, 0);
+-    operands[2] = GetTupleElement(reorder, 1);
+-  } else {
+-    // Reorder just the filter.
+-    operands[1] =
+-        CustomCall(&builder, std::string(kCudnnConvReorderFilterCallTarget),
+-                   {operands[1]}, filter_shape);
+-  }
+-  return OkStatus();
+-}
+-
+ // Tries to vectorize an already-vectorized convolution.
+ //
+ // That is, given a convolution of shape [N, C/k, H, W, k], changes it to have
+@@ -371,13 +335,6 @@ static StatusOr<bool> TryRevectorizeConv(
+         conv->ToString());
+   }
+
+-  // Reorder filter and bias for the int8x32 convolutions.
+-  const auto& debug_options = conv->GetModule()->config().debug_options();
+-  if (input_shape.element_type() == xla::S8 && vect_size == 32 &&
+-      debug_options.xla_gpu_enable_cudnn_int8x32_convolution_reordering()) {
+-    TF_RETURN_IF_ERROR(ReorderInt8NchwVect(conv, new_operands.data()));
+-  }
+-
+   // The custom-call returns a tuple (new_output_shape, u8[0]), where the second
+   // value in the tuple represents the convolution's scratch memory.
+   DimensionVector new_output_dims(output_shape.dimensions().begin(),
+@@ -502,13 +459,6 @@ static StatusOr<bool> TryVectorizeConv(
+         conv->ToString());
+   }
+
+-  // Reorder filter and bias for the int8x32 convolutions.
+-  const auto& debug_options = conv->GetModule()->config().debug_options();
+-  if (input_shape.element_type() == xla::S8 && vect_size == 32 &&
+-      debug_options.xla_gpu_enable_cudnn_int8x32_convolution_reordering()) {
+-    TF_RETURN_IF_ERROR(ReorderInt8NchwVect(conv, new_operands.data()));
+-  }
+-
+   // The custom-call returns a tuple (new_output_shape, u8[0]), where the second
+   // value in the tuple represents the convolution's scratch memory.
+   Shape new_output_shape = SplitShapeAtDim(
+@@ -545,8 +495,6 @@ static StatusOr<bool> TryVectorizeConv(
+   return true;
+ }
+
+-}  // namespace
+-
+ StatusOr<bool> CudnnVectorizeConvolutions::Run(
+     HloModule* module,
+     const absl::flat_hash_set<absl::string_view>& execution_threads) {
+diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
+index 9e4d8c95bb0..9b242ce3b04 100644
+--- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
++++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
+@@ -836,7 +836,8 @@ Status GpuCompiler::OptimizeHloPostLayoutAssignment(
+     pipeline.AddPass<GemmBroadcastFoldingRewriter>();
+
+     if (debug_options.xla_gpu_normalize_layouts()) {
+-      pipeline.AddPass<LayoutNormalization>(&NormalizeLayoutForGpuCustomCalls);
++      pipeline.AddPass<LayoutNormalization>(
++          &NormalizeLayoutForCustomCallConvolution);
+       pipeline.AddPass<HloPassFix<AlgebraicSimplifier>>(options);
+     }
+     pipeline.AddPass<BroadcastCanonicalizer>();
+diff --git a/tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc b/tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc
+index cb2aa3f2ac1..4c995b4f142 100644
+--- a/tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc
++++ b/tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc
+@@ -3485,7 +3485,7 @@ std::tuple<int, int> GetTensorVectorSizeAndDim(
+ tsl::StatusOr<cudnn_frontend::Tensor> CreateCudnnTensor(
+     absl::Span<const int64_t> dims, absl::Span<const int64_t> strides,
+     int64_t uid, dnn::DataType dtype, int64_t vec_count, int64_t vec_dim,
+-    bool is_virtual = false, bool is_reordered_nchw_vect = false) {
++    bool is_virtual = false) {
+   auto tensor = cudnn_frontend::TensorBuilder()
+                     .setDim(dims.size(), dims.data())
+                     .setStride(strides.size(), strides.data())
+@@ -3494,9 +3494,6 @@ tsl::StatusOr<cudnn_frontend::Tensor> CreateCudnnTensor(
+                     .setDataType(ToCudnnDataType(dtype))
+                     .setVectorCountAndDimension(vec_count, vec_dim)
+                     .setVirtual(is_virtual)
+-                    .setReorderType(is_reordered_nchw_vect
+-                                        ? CUDNN_TENSOR_REORDERING_INT8x32
+-                                        : CUDNN_TENSOR_REORDERING_NONE)
+                     .build();
+   RETURN_MSG_IF_CUDNN_ERROR(tensor);
+   return tensor;
+@@ -3523,6 +3520,11 @@ GetCudnnOperationGraph(dnn::ConvolutionKind kind, dnn::DataType input_type,
+   std::vector<int64_t> input_strides = input_descriptor.vectorized_strides(
+       dnn::DataLayout::kBatchDepthYX, vector_size, vector_dim);
+
++  if (vector_size == 32) {
++    return tsl::errors::Internal(
++        "cuDNN frontend doesn't support Tx32 at the moment.");
++  }
++
+   TF_ASSIGN_OR_RETURN(auto tensor_x,
+                       CreateCudnnTensor(input_dims, input_strides, 'x',
+                                         input_type, vector_size, vector_dim));
+@@ -3547,13 +3549,9 @@ GetCudnnOperationGraph(dnn::ConvolutionKind kind, dnn::DataType input_type,
+   std::vector<int64_t> filter_strides = filter_descriptor.vectorized_strides(
+       dnn::FilterLayout::kOutputInputYX, vector_size, vector_dim);
+
+-  TF_ASSIGN_OR_RETURN(
+-      auto tensor_w,
+-      CreateCudnnTensor(
+-          filter_dims, filter_strides, 'w', input_type, vector_size, vector_dim,
+-          /*is_virtual=*/false,
+-          /*is_reordered_nchw_vect=*/filter_descriptor.layout() ==
+-              dnn::FilterLayout::kOutputInputYX32_CudnnReordered));
++  TF_ASSIGN_OR_RETURN(auto tensor_w,
++                      CreateCudnnTensor(filter_dims, filter_strides, 'w',
++                                        input_type, vector_size, vector_dim));
+
+   // conv_desc.
+   auto mode = convolution_descriptor.convolution_not_crosscorr()
+@@ -3657,6 +3655,11 @@ GetCudnnFusedOperationGraph(
+   std::vector<int64_t> input_strides = input_descriptor.vectorized_strides(
+       dnn::DataLayout::kBatchDepthYX, vector_size, vector_dim);
+
++  if (vector_size == 32) {
++    return tsl::errors::Internal(
++        "cuDNN frontend doesn't support Tx32 at the moment.");
++  }
++
+   TF_ASSIGN_OR_RETURN(auto tensor_x,
+                       CreateCudnnTensor(input_dims, input_strides, 'x',
+                                         input_type, vector_size, vector_dim));
+@@ -3681,13 +3684,9 @@ GetCudnnFusedOperationGraph(
+       dnn::FilterLayout::kOutputInputYX, vector_size, vector_dim);
+   std::vector<int64_t> filter_strides = filter_descriptor.vectorized_strides(
+       dnn::FilterLayout::kOutputInputYX, vector_size, vector_dim);
+-  TF_ASSIGN_OR_RETURN(
+-      auto tensor_w,
+-      CreateCudnnTensor(
+-          filter_dims, filter_strides, 'w', input_type, vector_size, vector_dim,
+-          /*is_virtual=*/false,
+-          /*is_reordered_nchw_vect=*/filter_descriptor.layout() ==
+-              dnn::FilterLayout::kOutputInputYX32_CudnnReordered));
++  TF_ASSIGN_OR_RETURN(auto tensor_w,
++                      CreateCudnnTensor(filter_dims, filter_strides, 'w',
++                                        input_type, vector_size, vector_dim));
+
+   // For the purposes of the cudnn graph, say that the bias tensor has the same
+   // layout as the output tensor.  It doesn't actually matter, because bias is a
+@@ -4822,20 +4821,17 @@ tsl::Status CudnnSupport::GetConvolveRunners(
+     const dnn::ConvolutionDescriptor& convolution_descriptor, bool use_fallback,
+     ScratchAllocator* /*scratch_allocator*/,
+     std::vector<std::unique_ptr<const dnn::ConvRunner>>* out_exec_plans) {
++  // All current versions of the frontend API lack support for Tx32
++  // convolutions.
++  const bool is_unsupported_x32 =
++      input_descriptor.layout() == dnn::kBatchDepthYX32;
++
+   // cuDNN frontend support became sufficiently stable to use in 8.1.
+   // TODO(awpr): remove this condition once support for cuDNN 8.0 is dropped.
+   const bool is_pre_frontend_cudnn = CUDNN_VERSION < 8100;
+
+-  // cuDNN frontend support for Tx32 convolutions added in 8.3.
+-  // If the filter is not reordered, do not use frontend (it is slow).
+-  const bool is_disabled_x32 =
+-      input_descriptor.layout() == dnn::kBatchDepthYX32 &&
+-      (CUDNN_VERSION < 8300 ||
+-       filter_descriptor.layout() !=
+-           dnn::FilterLayout::kOutputInputYX32_CudnnReordered);
+-
+   const bool actually_use_cudnn_frontend =
+-      use_cudnn_frontend && !is_pre_frontend_cudnn && !is_disabled_x32;
++      use_cudnn_frontend && !is_pre_frontend_cudnn && !is_unsupported_x32;
+
+   if (use_cudnn_frontend && !actually_use_cudnn_frontend) {
+     // This will happen once per unique conv configuration/shape that gets
+@@ -4847,8 +4843,8 @@ tsl::Status CudnnSupport::GetConvolveRunners(
+               << "  filter: " << filter_descriptor.ToString() << "\n"
+               << "  " << convolution_descriptor.ToString() << "\n"
+               << "  ... because "
+-              << (is_disabled_x32
+-                      ? "Tx32 convolutions are disabled."
++              << (is_unsupported_x32
++                      ? "Tx32 convolutions are unsupported."
+                       : "the current cuDNN version does not support it.");
+   }
+
+@@ -4933,12 +4929,6 @@ CudnnSupport::ConvolveRunnerFromDesc(
+         ToCudnnDataType(GetConvAccumulatorType(input_type)));
+     conv.set_use_tensor_op_math(algorithm_desc.tensor_ops_enabled());
+
+-    if (filter_descriptor.layout() ==
+-        dnn::FilterLayout::kOutputInputYX32_CudnnReordered) {
+-      CHECK_CUDNN_OK(
+-          cudnnSetConvolutionReorderType(conv.handle(), CUDNN_NO_REORDER));
+-    }
+-
+     TF_ASSIGN_OR_RETURN(
+         auto runner,
+         CudnnLegacyConvRunner::Create(
+@@ -5200,12 +5190,6 @@ CudnnSupport::FusedConvolveRunnerFromDesc(
+         ToCudnnDataType(GetConvAccumulatorType(input_type)));
+     conv.set_use_tensor_op_math(algorithm_desc.tensor_ops_enabled());
+
+-    if (filter_descriptor.layout() ==
+-        dnn::FilterLayout::kOutputInputYX32_CudnnReordered) {
+-      CHECK_CUDNN_OK(
+-          cudnnSetConvolutionReorderType(conv.handle(), CUDNN_NO_REORDER));
+-    }
+-
+     // CUDNN v6 only supports CUDNN_NOT_PROPAGATE_NAN as the reluNanOpt for
+     // activation descriptor. Note that this will change the nan propagation
+     // behavior from separate conv, bias, and relu (which by default is
+@@ -5275,26 +5259,23 @@ tsl::Status CudnnSupport::GetFusedConvolveRunners(
+       false;
+ #endif
+
++  // All current versions of the frontend API lack support for Tx32
++  // convolutions.
++  const bool is_unsupported_x32 =
++      input_descriptor.layout() == dnn::kBatchDepthYX32;
++
+   // cuDNN frontend support became sufficiently stable to use in 8.1.
+   // TODO(awpr): remove this condition once support for cuDNN 8.0 is dropped.
+   const bool is_pre_frontend_cudnn = CUDNN_VERSION < 8100;
+
+-  // cuDNN frontend support for Tx32 convolutions added in 8.3.
+-  // If the filter is not reordered, do not use frontend (it is slow).
+-  const bool is_disabled_x32 =
+-      input_descriptor.layout() == dnn::kBatchDepthYX32 &&
+-      (CUDNN_VERSION < 8300 ||
+-       filter_descriptor.layout() !=
+-           dnn::FilterLayout::kOutputInputYX32_CudnnReordered);
+-
+   const bool actually_use_cudnn_frontend =
+       use_cudnn_frontend && !is_pre_frontend_cudnn &&
+-      !is_broken_identity_fused_conv && !is_disabled_x32;
++      !is_broken_identity_fused_conv && !is_unsupported_x32;
+
+   if (use_cudnn_frontend && !actually_use_cudnn_frontend) {
+     const char* reason = "the current cuDNN version does not support it.";
+-    if (is_disabled_x32) {
+-      reason = "Tx32 convolutions are disabled.";
++    if (is_unsupported_x32) {
++      reason = "Tx32 convolutions are unsupported.";
+     } else if (is_broken_identity_fused_conv) {
+       reason = "it uses an identity activation.";
+     }
+diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
+index a7ece361f17..efa6e41c4a0 100644
+--- a/tensorflow/compiler/xla/tests/BUILD
++++ b/tensorflow/compiler/xla/tests/BUILD
+@@ -1298,7 +1298,7 @@ xla_test(
+     srcs = ["convolution_cudnn_test.cc"],
+     backend_tags = {"gpu": [
+         "gpu",
+-        "requires-gpu-sm80",
++        "requires-gpu-sm70",
+     ]},
+     backends = ["gpu"],
+     deps = [
+diff --git a/tensorflow/compiler/xla/tests/convolution_cudnn_test.cc b/tensorflow/compiler/xla/tests/convolution_cudnn_test.cc
+index 04b9126302d..596f2be8f77 100644
+--- a/tensorflow/compiler/xla/tests/convolution_cudnn_test.cc
++++ b/tensorflow/compiler/xla/tests/convolution_cudnn_test.cc
+@@ -61,60 +61,5 @@ ENTRY TestComputation {
+   EXPECT_TRUE(RunAndCompare(kHlo, ErrorSpec{0, 0}));
+ }
+
+-XLA_TEST_F(ConvolutionHloTest, TestCudnnConvInt8x32BiasNonConst) {
+-  // Test two GPU compiled HLOs, first version with vectorization disabled,
+-  // second with vectorization enabled. The reference implementation
+-  // (Interpreter) does not support the fused conv-add-relu-clamp operation,
+-  // thus cannot be used.
+-  if (!backend()
+-           .default_stream_executor()
+-           ->GetDeviceDescription()
+-           .cuda_compute_capability()
+-           .IsAtLeast(8)) {
+-    return;
+-  }
+-  constexpr char kHloBase[] = R"(
+-HloModule TestModule, entry_computation_layout={(s8[4,48,48,64]{3,2,1,0},s8[64,3,3,64]{3,2,1,0},s8[64]{0})->s8[4,48,48,64]{3,2,1,0}}
+-
+-ENTRY TestComputation {
+-  input = s8[4,48,48,64]{3,2,1,0} parameter(0)
+-  filter = s8[64,3,3,64]{3,2,1,0} parameter(1)
+-  bias = s8[64]{0} parameter(2)
+-  convert.1 = f32[64]{0} convert(bias)
+-  cudnn-conv-bias-activation.3 = (s8[4,48,48,64]{3,2,1,0}, u8[0]{0}) custom-call(input, filter, convert.1),
+-      window={size=3x3 pad=1_1x1_1}, dim_labels=b01f_o01i->b01f, custom_call_target="__cudnn$convBiasActivationForward",
+-      backend_config="{\"activation_mode\":\"2\",\"conv_result_scale\":1,\"side_input_scale\":0,\"algorithm\":{
+-        \"algo_id\":\"38\",\"math_type\":\"DEFAULT_MATH\",\"tuning_knobs\":{\"14\":\"5\",\"13\":\"1\",\"23\":\"0\",\"2\":\"1\"},
+-        \"is_cudnn_frontend\":true,\"workspace_size\":\"0\"}}"
+-  ROOT get-tuple-element.1 = s8[4,48,48,64]{3,2,1,0} get-tuple-element(cudnn-conv-bias-activation.3), index=0
+-})";
+-  constexpr char kHloVectorized[] = R"(
+-HloModule TestModule, entry_computation_layout={(s8[4,48,48,64]{3,2,1,0},s8[64,3,3,64]{3,2,1,0},s8[64]{0})->s8[4,48,48,64]{3,2,1,0}}
+-
+-ENTRY TestComputation {
+-  input = s8[4,48,48,64]{3,2,1,0} parameter(0)
+-  bitcast.36 = s8[4,48,48,2,32]{4,3,2,1,0} bitcast(input)
+-  transpose = s8[4,2,48,48,32]{4,3,2,1,0} transpose(bitcast.36), dimensions={0,3,1,2,4}
+-  filter = s8[64,3,3,64]{3,2,1,0} parameter(1)
+-  bitcast.18 = s8[64,3,3,2,32]{4,3,2,1,0} bitcast(filter)
+-  transpose.3 = s8[64,2,3,3,32]{4,3,2,1,0} transpose(bitcast.18), dimensions={0,3,1,2,4}
+-  bias = s8[64]{0} parameter(2)
+-  convert.2 = f32[64]{0} convert(bias)
+-  custom-call.3 = (s8[64,2,3,3,32]{4,3,2,1,0}, f32[64]{0}) custom-call(transpose.3, convert.2), custom_call_target="__cudnn$convReorderFilterAndBias"
+-  get-tuple-element.2 = s8[64,2,3,3,32]{4,3,2,1,0} get-tuple-element(custom-call.3), index=0
+-  get-tuple-element.3 = f32[64]{0} get-tuple-element(custom-call.3), index=1
+-  cudnn-conv-bias-activation.4 = (s8[4,2,48,48,32]{4,3,2,1,0}, u8[51328]{0}) custom-call(transpose, get-tuple-element.2, get-tuple-element.3),
+-      window={size=3x3 pad=1_1x1_1}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBiasActivationForward",
+-      backend_config="{\"activation_mode\":\"2\",\"conv_result_scale\":1,\"side_input_scale\":0,\"algorithm\":{
+-        \"algo_id\":\"7\",\"math_type\":\"DEFAULT_MATH\",\"tuning_knobs\":{\"7\":\"3\",\"2\":\"0\",\"5\":\"4\",\"6\":\"4\",\"4\":\"2\",\"21\":\"0\"},
+-        \"is_cudnn_frontend\":true,\"workspace_size\":\"51328\"},\"reordered_int8_nchw_vect\":true}"
+-  get-tuple-element.6 = s8[4,2,48,48,32]{4,3,2,1,0} get-tuple-element(cudnn-conv-bias-activation.4), index=0
+-  transpose.4 = s8[4,48,48,2,32]{4,3,2,1,0} transpose(get-tuple-element.6), dimensions={0,2,3,1,4}
+-  ROOT bitcast.1 = s8[4,48,48,64]{3,2,1,0} bitcast(transpose.4)
+-})";
+-  EXPECT_TRUE(RunAndCompareTwoModules(kHloBase, kHloVectorized, ErrorSpec{0, 0},
+-                                      /*run_hlo_passes=*/false));
+-}
+-
+ }  // namespace
+ }  // namespace xla
+diff --git a/tensorflow/compiler/xla/translate/mhlo_to_lhlo_with_xla/mhlo_to_lhlo_with_xla.cc b/tensorflow/compiler/xla/translate/mhlo_to_lhlo_with_xla/mhlo_to_lhlo_with_xla.cc
+index a25931e6fce..33de736599c 100644
+--- a/tensorflow/compiler/xla/translate/mhlo_to_lhlo_with_xla/mhlo_to_lhlo_with_xla.cc
++++ b/tensorflow/compiler/xla/translate/mhlo_to_lhlo_with_xla/mhlo_to_lhlo_with_xla.cc
+@@ -1233,10 +1233,10 @@ LhloDialectEmitter::EmitDnnConvolutionReorderVectorized(
+     }
+
+     CHECK_EQ(shape.rank(), 5);
+-    CHECK_EQ(shape.dimensions(4), 32);
++    CHECK_EQ(shape.dimensions_minor(0), 32);
+     llvm::SmallVector<int64_t, 4> nchw = {
+-        shape.dimensions(0), shape.dimensions(1) * 32, shape.dimensions(2),
+-        shape.dimensions(3)};
++        shape.dimensions_minor(4), shape.dimensions_minor(3) * 32,
++        shape.dimensions_minor(2), shape.dimensions_minor(1)};
+     op->setAttr("filter_dims", GetI64DenseElementsAttr(nchw));
+
+     return op.getOperation();
+diff --git a/tensorflow/compiler/xla/xla.proto b/tensorflow/compiler/xla/xla.proto
+index 78e4706edfe..9f5b65a5858 100644
+--- a/tensorflow/compiler/xla/xla.proto
++++ b/tensorflow/compiler/xla/xla.proto
+@@ -462,9 +462,7 @@ message DebugOptions {
+
+   bool xla_gpu_enable_triton_gemm = 188;
+
+-  bool xla_gpu_enable_cudnn_int8x32_convolution_reordering = 189;
+-
+-  // Next id: 190
++  // Next id: 189
+
+   // Extra options to pass to the compilation backend (e.g. LLVM); specific
+   // interpretation of these values is left to the backend.
diff --git a/tf_patches/ffp_gpu.diff b/tf_patches/ffp_gpu.diff
deleted file mode 100644
index 7f71d6f28ceb..000000000000
--- a/tf_patches/ffp_gpu.diff
+++ /dev/null
@@ -1,40 +0,0 @@
-# enable gpu build, remove after next pin update
-diff --git a/tensorflow/core/kernels/fft_ops.cc b/tensorflow/core/kernels/fft_ops.cc
-index 5e0464e89f1..2d9cd438eb2 100644
---- a/tensorflow/core/kernels/fft_ops.cc
-+++ b/tensorflow/core/kernels/fft_ops.cc
-@@ -455,7 +455,7 @@ class FftPlanCache {
-       cache_.erase(it);
-     }
-     --size_;
--    return value;
-+    return std::optional<Value>(std::move(value));
-   }
- 
-   // Inserts a plan into the cache as long as there is still capacity.
-diff --git a/tensorflow/compiler/xla/runtime/ffi.cc b/tensorflow/compiler/xla/runtime/ffi.cc
-index 584d2008b11..231abca9c92 100644
---- a/tensorflow/compiler/xla/runtime/ffi.cc
-+++ b/tensorflow/compiler/xla/runtime/ffi.cc
-@@ -380,7 +380,7 @@ absl::StatusOr<FfiStateVector> FfiModulesState::state_vector() const {
- 
-     return absl::InternalError("Unsupported FFI module state");
-   }
--  return state_vector;
-+  return std::move(state_vector);
- }
- 
- //===----------------------------------------------------------------------===//
- diff --git a/tensorflow/compiler/xla/runtime/executable.cc b/tensorflow/compiler/xla/runtime/executable.cc
-index 23c8fa7421e..9d715d5c61d 100644
---- a/tensorflow/compiler/xla/runtime/executable.cc
-+++ b/tensorflow/compiler/xla/runtime/executable.cc
-@@ -332,7 +332,7 @@ absl::StatusOr<ExecutionReference> Executable::Execute(
-   if (auto st = ReturnResults(ordinal, results, &call_frame); !st.ok())
-     return st;
- 
--  return exec_ref;
-+  return std::move(exec_ref);
- }
- 
- ExecutionReference Executable::Execute(unsigned ordinal, CallFrame& call_frame,
diff --git a/tf_patches/grpc_version.diff b/tf_patches/grpc_version.diff
index 627c31d374d2..261efd356b6f 100644
--- a/tf_patches/grpc_version.diff
+++ b/tf_patches/grpc_version.diff
@@ -5,7 +5,7 @@ index a2ff5848fcf..930313e8c25 100644
 +++ b/.bazelrc
 @@ -557,8 +557,8 @@ build:rbe_linux_py3_base --python_path="/usr/local/bin/python3.9"
  build:rbe_linux_py3_base --repo_env=TF_PYTHON_CONFIG_REPO="@ubuntu20.04-gcc9_manylinux2014-cuda11.2-cudnn8.1-tensorrt7.2_config_python3.9"
- 
+
  build:rbe_win --config=rbe
 -build:rbe_win --crosstool_top="//tensorflow/tools/toolchains/win/tf_win_01232023:toolchain"
 -build:rbe_win --extra_toolchains="//tensorflow/tools/toolchains/win/tf_win_01232023:cc-toolchain-x64_windows"
@@ -14,11 +14,23 @@ index a2ff5848fcf..930313e8c25 100644
  build:rbe_win --extra_execution_platforms="//tensorflow/tools/toolchains/win:rbe_windows_ltsc2019"
  build:rbe_win --host_platform="//tensorflow/tools/toolchains/win:rbe_windows_ltsc2019"
  build:rbe_win --platforms="//tensorflow/tools/toolchains/win:rbe_windows_ltsc2019"
+diff --git a/tensorflow/compiler/xla/tests/build_defs.bzl b/tensorflow/compiler/xla/tests/build_defs.bzl
+index 10b30f63755..306512e21b8 100644
+--- a/tensorflow/compiler/xla/tests/build_defs.bzl
++++ b/tensorflow/compiler/xla/tests/build_defs.bzl
+@@ -112,6 +112,7 @@ def xla_test(
+     native.cc_library(
+         name = "%s_lib" % name,
+         srcs = srcs,
++        tags = tags,
+         copts = copts,
+         testonly = True,
+         deps = deps,
 diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files
-index 3d16e3b862a..1a2b9e015eb 100644
+index 79c176026f7..e878daf41db 100644
 --- a/tensorflow/opensource_only.files
 +++ b/tensorflow/opensource_only.files
-@@ -169,7 +169,6 @@ tensorflow/tools/toolchains/win/bazel_211/BUILD:
+@@ -171,7 +171,6 @@ tensorflow/tools/toolchains/win/bazel_211/BUILD:
  tensorflow/tools/toolchains/win/tf_win_01072022/BUILD:
  tensorflow/tools/toolchains/win/tf_win_01112023/BUILD:
  tensorflow/tools/toolchains/win/tf_win_01122022/BUILD:
@@ -35,7 +47,7 @@ index 3d1c95c22a9..fb99462a900 100644
  load("//tensorflow/core/platform:build_config.bzl", "tf_protos_grappler")
 -load("//tensorflow:tensorflow.bzl", "if_not_windows")
 +load("//tensorflow:tensorflow.bzl", "VERSION", "if_not_windows", "tf_python_pybind_static_deps")
- 
+
  package(
      # copybara:uncomment default_applicable_licenses = ["//tensorflow:license"],
 @@ -249,13 +249,12 @@ tf_python_pybind_extension(
@@ -59,19 +71,19 @@ index 3d1c95c22a9..fb99462a900 100644
          "//tensorflow/core:framework_headers_lib",
          "//tensorflow/core:lib_headers_for_pybind",
 diff --git a/tensorflow/python/saved_model/BUILD b/tensorflow/python/saved_model/BUILD
-index 6dba97c839d..7e8e0b9c2bd 100644
+index 57bf7a8a3b1..d0b51d17c81 100644
 --- a/tensorflow/python/saved_model/BUILD
 +++ b/tensorflow/python/saved_model/BUILD
 @@ -3,7 +3,7 @@
- 
+
  load("//tensorflow:strict.default.bzl", "py_strict_library")
  load("//tensorflow:tensorflow.default.bzl", "cuda_py_test", "tf_py_test", "tf_pybind_cc_library_wrapper", "tf_python_pybind_extension")
 -load("//tensorflow:tensorflow.bzl", "if_google")
 +load("//tensorflow:tensorflow.bzl", "VERSION", "if_google", "tf_python_pybind_static_deps")
- 
+
  package(
      # copybara:uncomment default_applicable_licenses = ["//tensorflow:license"],
-@@ -737,14 +737,13 @@ tf_python_pybind_extension(
+@@ -727,14 +727,13 @@ tf_python_pybind_extension(
          "pywrap_saved_model_fingerprinting.h",
          "pywrap_saved_model_metrics.h",
      ],
@@ -93,7 +105,7 @@ index 6dba97c839d..7e8e0b9c2bd 100644
      deps = [
          ":pywrap_saved_model_headers",
 diff --git a/tensorflow/tools/ci_build/release/requirements_common.txt b/tensorflow/tools/ci_build/release/requirements_common.txt
-index e2328493a61..b4b785fc53d 100644
+index 051387bf64e..be3f16a4edb 100644
 --- a/tensorflow/tools/ci_build/release/requirements_common.txt
 +++ b/tensorflow/tools/ci_build/release/requirements_common.txt
 @@ -11,7 +11,7 @@ h5py ~= 3.8.0  # Earliest version for Python 3.11
@@ -131,11 +143,11 @@ index 444c2d151ab..8e97ea56f9f 100644
 -    def_fp.write("\t ??1CoordinatedTask@tensorflow@@UEAA@XZ\n") # for _pywrap_tfe
 -    def_fp.write("\t ?CopyFrom@CoordinatedTask@tensorflow@@QEAAXAEBV12@@Z\n") # for _pywrap_tfe
 -    def_fp.write("\t ??0CoordinatedTask@tensorflow@@IEAA@PEAVArena@protobuf@google@@_N@Z\n") # for _pywrap_tfe
- 
+
      # Each symbols returned by undname matches the same position in candidates.
      # We compare on undname but use the decorated name from candidates.
 diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
-index 915ad896567..00d85d32d79 100644
+index cc17d57800e..329588662e9 100644
 --- a/tensorflow/tools/pip_package/setup.py
 +++ b/tensorflow/tools/pip_package/setup.py
 @@ -104,7 +104,7 @@ REQUIRED_PACKAGES = [
@@ -2310,7 +2322,7 @@ index c6b49ba7ca4..827e51a4a8d 100644
 @@ -401,13 +401,6 @@ def py_proto_library(
          # is not explicitly listed in py_libs. Instead, host system is assumed to
          # have grpc installed.
- 
+
 -    genproto_deps = []
 -    for dep in deps:
 -        if dep != "@com_google_protobuf//:protobuf_python":
@@ -2328,10 +2340,10 @@ index c6b49ba7ca4..827e51a4a8d 100644
 -        deps = genproto_deps,
 +        deps = [s + "_genproto" for s in deps],
      )
- 
+
      if default_runtime and not default_runtime in py_libs + deps:
 diff --git a/tensorflow/workspace2.bzl b/tensorflow/workspace2.bzl
-index 80c4442433b..852a0fbb91f 100644
+index 80072dec290..3506f7d22c0 100644
 --- a/tensorflow/workspace2.bzl
 +++ b/tensorflow/workspace2.bzl
 @@ -457,14 +457,14 @@ def _tf_repositories():
@@ -2350,29 +2362,56 @@ index 80c4442433b..852a0fbb91f 100644
 -        urls = tf_mirror_urls("https://github.com/protocolbuffers/protobuf/archive/v3.21.9.zip"),
 +        urls = tf_mirror_urls("https://github.com/protocolbuffers/protobuf/archive/v3.9.2.zip"),
      )
- 
+
+     tf_http_archive(
+@@ -485,9 +485,9 @@ def _tf_repositories():
+
+     tf_http_archive(
+         name = "com_google_fuzztest",
+-        sha256 = "c75f224b34c3c62ee901381fb743f6326f7b91caae0ceb8fe62f3fd36f187627",
+-        strip_prefix = "fuzztest-58b4e7065924f1a284952b84ea827ce35a87e4dc",
+-        urls = tf_mirror_urls("https://github.com/google/fuzztest/archive/58b4e7065924f1a284952b84ea827ce35a87e4dc.zip"),
++        sha256 = "3fe79ede8e860ba7331987b2c1f84d3eeaf5bea00fd76398d6ff0006635586c6",
++        strip_prefix = "fuzztest-6d79ceb1dc2398e02a39efc23ce40d68baa16a42",
++        urls = tf_mirror_urls("https://github.com/google/fuzztest/archive/6d79ceb1dc2398e02a39efc23ce40d68baa16a42.zip"),
+     )
+
      tf_http_archive(
 @@ -567,10 +567,10 @@ def _tf_repositories():
- 
+
      tf_http_archive(
          name = "boringssl",
--        sha256 = "b460f8673f3393e58ce506e9cdde7f2c3b2575b075f214cb819fb57d809f052b",
--        strip_prefix = "boringssl-bb41bc007079982da419c0ec3186e510cbcf09d0",
+-        sha256 = "534fa658bd845fd974b50b10f444d392dfd0d93768c4a51b61263fd37d851c40",
+-        strip_prefix = "boringssl-b9232f9e27e5668bc0414879dcdedb2a59ea75f2",
 +        sha256 = "fd0e06a8a57dcba1132f91fef1c1327191e913b6c50a84633f7175090972196c",
 +        strip_prefix = "boringssl-f9eff21461cf79556a0fb8ca9b1bf60c3b283ce8",
          system_build_file = "//third_party/systemlibs:boringssl.BUILD",
--        urls = tf_mirror_urls("https://github.com/google/boringssl/archive/bb41bc007079982da419c0ec3186e510cbcf09d0.zip"),
+-        urls = tf_mirror_urls("https://github.com/google/boringssl/archive/b9232f9e27e5668bc0414879dcdedb2a59ea75f2.tar.gz"),
 +        urls = tf_mirror_urls("https://github.com/google/boringssl/archive/f9eff21461cf79556a0fb8ca9b1bf60c3b283ce8.tar.gz"),
      )
- 
+
      # Note: if you update this, you have to update libpng too. See cl/437813808
+@@ -873,13 +873,6 @@ def _tf_repositories():
+         system_build_file = "//third_party/systemlibs:pybind11.BUILD",
+     )
+
+-    tf_http_archive(
+-        name = "pybind11_protobuf",
+-        urls = tf_mirror_urls("https://github.com/pybind/pybind11_protobuf/archive/80f3440cd8fee124e077e2e47a8a17b78b451363.zip"),
+-        sha256 = "",
+-        strip_prefix = "pybind11_protobuf-80f3440cd8fee124e077e2e47a8a17b78b451363",
+-    )
+-
+     tf_http_archive(
+         name = "wrapt",
+         build_file = "//third_party:wrapt.BUILD",
 diff --git a/tensorflow/workspace3.bzl b/tensorflow/workspace3.bzl
 index 91871db22c8..5cfa0553579 100644
 --- a/tensorflow/workspace3.bzl
 +++ b/tensorflow/workspace3.bzl
 @@ -37,11 +37,11 @@ def workspace():
      )
- 
+
      # Maven dependencies.
 -    RULES_JVM_EXTERNAL_TAG = "4.3"
 +    RULES_JVM_EXTERNAL_TAG = "3.2"
@@ -2383,20 +2422,20 @@ index 91871db22c8..5cfa0553579 100644
 +        sha256 = "82262ff4223c5fda6fb7ff8bd63db8131b51b413d26eb49e3131037e79e324af",
          url = "https://github.com/bazelbuild/rules_jvm_external/archive/%s.zip" % RULES_JVM_EXTERNAL_TAG,
      )
- 
+
 diff --git a/third_party/pprof.BUILD b/third_party/pprof.BUILD
 index b7d33118665..dd38a09cd0c 100644
 --- a/third_party/pprof.BUILD
 +++ b/third_party/pprof.BUILD
 @@ -4,7 +4,7 @@ package(
- 
+
  licenses(["notice"])  # MIT
- 
+
 -load("@org_tensorflow//tensorflow/tsl/platform/default:build_config.bzl", "py_proto_library")
 +load("@com_google_protobuf//:protobuf.bzl", "py_proto_library")
- 
+
  exports_files(["pprof/LICENSE"])
- 
+
 diff --git a/third_party/protobuf/protobuf.patch b/third_party/protobuf/protobuf.patch
 index 9d928ba175f..5ff9f425b76 100644
 --- a/third_party/protobuf/protobuf.patch
@@ -2413,14 +2452,14 @@ index 9d928ba175f..5ff9f425b76 100644
 +@@ -23,7 +23,7 @@ config_setting(
 + # ZLIB configuration
 + ################################################################################
-+ 
++
 +-ZLIB_DEPS = ["@zlib//:zlib"]
 ++ZLIB_DEPS = ["@zlib"]
-+ 
++
 + ################################################################################
 + # Protobuf Runtime Library
 +@@ -100,6 +100,7 @@ LINK_OPTS = select({
-+ 
++
 + load(
 +     ":protobuf.bzl",
 ++    "adapt_proto_library",
@@ -2436,7 +2475,7 @@ index 9d928ba175f..5ff9f425b76 100644
   )
 -
 -@@ -135,6 +136,7 @@
-+ 
++
 +@@ -213,6 +215,7 @@ cc_library(
       copts = COPTS,
       includes = ["src/"],
@@ -2450,7 +2489,7 @@ index 9d928ba175f..5ff9f425b76 100644
 +@@ -255,13 +258,15 @@ filegroup(
 +     visibility = ["//visibility:public"],
 + )
-+ 
++
 +-cc_proto_library(
 ++adapt_proto_library(
 ++    name = "cc_wkt_protos_genproto",
@@ -2468,9 +2507,9 @@ index 9d928ba175f..5ff9f425b76 100644
 ++    deprecation = "Only for backward compatibility. Do not use.",
 +     visibility = ["//visibility:public"],
 + )
-+ 
++
 +@@ -978,10 +983,10 @@ cc_library(
-+ 
++
 + proto_lang_toolchain(
 +     name = "cc_toolchain",
 ++    blacklisted_protos = [proto + "_proto" for proto in WELL_KNOWN_PROTO_MAP.keys()],
@@ -2479,7 +2518,7 @@ index 9d928ba175f..5ff9f425b76 100644
 +     visibility = ["//visibility:public"],
 +-    blacklisted_protos = [":_internal_wkt_protos_genrule"],
 + )
-+ 
++
 + proto_lang_toolchain(
 +diff --git a/protobuf.bzl b/protobuf.bzl
 +index e0653321f..4156a1275 100644
@@ -2488,7 +2527,7 @@ index 9d928ba175f..5ff9f425b76 100644
 +@@ -1,4 +1,5 @@
 + load("@bazel_skylib//lib:versions.bzl", "versions")
 ++load("@rules_proto//proto:defs.bzl", "ProtoInfo")
-+ 
++
 + def _GetPath(ctx, path):
 +     if ctx.label.workspace_root:
 +@@ -85,6 +86,8 @@ def _proto_gen_impl(ctx):
@@ -2497,13 +2536,13 @@ index 9d928ba175f..5ff9f425b76 100644
 +         deps += dep.proto.deps
 ++    import_flags = depset(import_flags).to_list()
 ++    deps = depset(deps).to_list()
-+ 
++
 +     if not ctx.attr.gen_cc and not ctx.attr.gen_py and not ctx.executable.plugin:
 +         return struct(
 +@@ -222,6 +225,29 @@ Args:
 +   outs: a list of labels of the expected outputs from the protocol compiler.
 + """
-+ 
++
 ++def _adapt_proto_library_impl(ctx):
 ++    deps = [dep[ProtoInfo] for dep in ctx.attr.deps]
 ++
@@ -2553,11 +2592,11 @@ index 9d928ba175f..5ff9f425b76 100644
 +       **kargs: other keyword arguments that are passed to cc_library.
 +-
 +     """
-+ 
++
 +     includes = []
 +     if include != None:
 +         includes = [include]
-+ 
++
 +-    if internal_bootstrap_hack:
 +-        # For pre-checked-in generated files, we add the internal_bootstrap_hack
 +-        # which will skip the codegen action.
@@ -2588,7 +2627,7 @@ index 9d928ba175f..5ff9f425b76 100644
 -@@ -58,6 +58,37 @@
 -               : 0)                                               \
 -        : PyBytes_AsStringAndSize(ob, (charpp), (sizep)))
-- 
+-
 +@@ -63,6 +63,37 @@
 +                        : PyBytes_AsStringAndSize(ob, (charpp), (sizep)))
 + #endif
@@ -2624,7 +2663,7 @@ index 9d928ba175f..5ff9f425b76 100644
  +      goto exit;
       }
     }
-- 
+-
 --  if (frame->f_code->co_filename == nullptr) {
 +-
 +-  if (frame->f_code->co_filename == NULL) {
@@ -2639,7 +2678,7 @@ index 9d928ba175f..5ff9f425b76 100644
  -    return false;
  +    goto exit;
     }
-- 
+-
 +
  -  if (frame->f_globals != frame->f_locals) {
  +  frame_globals = PyFrame_GetGlobals(frame);
@@ -2657,7 +2696,7 @@ index 9d928ba175f..5ff9f425b76 100644
 +@@ -2991,8 +2991,12 @@ bool InitProto2MessageModule(PyObject *m) {
 +         reinterpret_cast<PyObject*>(
 +             &RepeatedCompositeContainer_Type));
-  
+
 - // If the calling code is not a _pb2.py file, raise AttributeError.
 \ No newline at end of file
 +-    // Register them as collections.Sequence
@@ -2684,3 +2723,12 @@ index 9d928ba175f..5ff9f425b76 100644
 +     PyErr_Format(PyExc_ValueError,
 +                  "UnknownField does not exist. "
 \ No newline at end of file
+diff --git a/third_party/pybind11_protobuf/BUILD b/third_party/pybind11_protobuf/BUILD
+deleted file mode 100644
+index 3b946e563d4..00000000000
+--- a/third_party/pybind11_protobuf/BUILD
++++ /dev/null
+@@ -1,3 +0,0 @@
+-# Necessary for bazel to recognize this as a package.
+-
+-# copybara:uncomment package(default_applicable_licenses = ["//tensorflow:license"])
diff --git a/third_party/tensorflow b/third_party/tensorflow
index a81addf25f8c..f7759359f842 160000
--- a/third_party/tensorflow
+++ b/third_party/tensorflow
@@ -1 +1 @@
-Subproject commit a81addf25f8c7420fe22674205e4fa410980bb5e
+Subproject commit f7759359f8420d3ca7b9fd19493f2a01bd47b4ef
diff --git a/torch_xla/csrc/xla_sharding_util.cpp b/torch_xla/csrc/xla_sharding_util.cpp
index 499f03b6d478..4a182ee78eeb 100644
--- a/torch_xla/csrc/xla_sharding_util.cpp
+++ b/torch_xla/csrc/xla_sharding_util.cpp
@@ -158,7 +158,8 @@ xla::HloModuleProto ShardingUtil::SpmdPartitioningPass(
   // TODO(yeounoh) side-effecting ops gets assigned replicated sharding.
   pass.AddPass<xla::ShardingPropagation>(
       /*is_spmd=*/true, /*propagate_metadata=*/false,
-      /*allow_spmd_sharding_propagation_to_output=*/true);
+      /*allow_spmd_sharding_propagation_to_output=*/
+      absl::MakeConstSpan({true}));
   pass.AddPass<xla::spmd::SpmdPartitioner>(
       /*num_partitions=*/num_partitions,
       /*num_replicas=*/num_replicas, options,