TF/libtpu Pin Update (#4036)

JackCaoG · root · alanwaketan · web-flow · commit 90341158207f · 2022-10-05T12:05:16.000-07:00
Summary:
This commit is to bump TF to c60aa9632b0bbb75fa26f723207657d2b998d424 and libtpu to 0.1.dev20220930.

Test Plan:
CI and local resnet runs.

Co-authored-by: root &lt;root@t1v-n-7513beeb-w-0.us-central1-a.c.tpu-pytorch.internal&gt;
Co-authored-by: Jiewen Tan &lt;jwtan@google.com&gt;
diff --git a/build_torch_xla_libs.sh b/build_torch_xla_libs.sh
@@ -84,7 +84,11 @@ else
   cp -r -u -p $THIRD_PARTY_DIR/xla_client $THIRD_PARTY_DIR/tensorflow/tensorflow/compiler/xla/
 
   pushd $THIRD_PARTY_DIR/tensorflow
-  bazel build $MAX_JOBS $VERBOSE $TPUVM_FLAG --spawn_strategy=$BUILD_STRATEGY --show_progress_rate_limit=20 \
+  # TensorFlow and its dependencies may introduce warning flags from newer compilers 
+  # that PyTorch and PyTorch/XLA's default compilers don't recognize. They become error
+  # while '-Werror' is used. Therefore, surpress the warnings.
+  TF_EXTRA_FLAGS="--copt=-Wno-unknown-warning-option"
+  bazel build $MAX_JOBS $VERBOSE $TPUVM_FLAG $TF_EXTRA_FLAGS --spawn_strategy=$BUILD_STRATEGY --show_progress_rate_limit=20 \
     --define framework_shared_object=false -c "$MODE" "${OPTS[@]}" \
     $XLA_CUDA_CFG //tensorflow/compiler/xla/xla_client:libxla_computation_client.so
 
diff --git a/setup.py b/setup.py
@@ -53,7 +53,7 @@
 base_dir = os.path.dirname(os.path.abspath(__file__))
 third_party_path = os.path.join(base_dir, 'third_party')
 
-_libtpu_version = '0.1.dev20220816'
+_libtpu_version = '0.1.dev20220930'
 _libtpu_storage_path = f'https://storage.googleapis.com/cloud-tpu-tpuvm-artifacts/wheels/libtpu-nightly/libtpu_nightly-{_libtpu_version}-py3-none-any.whl'
 
 
diff --git a/tf_patches/patch.diff b/tf_patches/patch.diff
@@ -1,118 +1,5 @@
-diff --git a/tensorflow/core/profiler/convert/xplane_to_tools_data.cc b/tensorflow/core/profiler/convert/xplane_to_tools_data.cc
-index 6a3c045db5c..fbb8a84786e 100644
---- a/tensorflow/core/profiler/convert/xplane_to_tools_data.cc
-+++ b/tensorflow/core/profiler/convert/xplane_to_tools_data.cc
-@@ -20,9 +20,13 @@ limitations under the License.
- 
- #include "absl/strings/str_format.h"
- #include "absl/strings/string_view.h"
-+#include "tensorflow/compiler/xla/service/hlo.pb.h"
-+#include "tensorflow/core/lib/gtl/map_util.h"
- #include "tensorflow/core/platform/env.h"
- #include "tensorflow/core/platform/logging.h"
-+#include "tensorflow/core/platform/path.h"
- #include "tensorflow/core/platform/protobuf.h"
-+#include "tensorflow/core/profiler/convert/hlo_proto_to_memory_visualization_utils.h"
- #include "tensorflow/core/profiler/convert/hlo_to_tools_data.h"
- #include "tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h"
- #include "tensorflow/core/profiler/convert/op_stats_to_op_profile.h"
-@@ -232,7 +236,77 @@ std::pair<std::string, bool> ConvertMultiXSpacesToOpProfileViewer(
- 
-   return std::make_pair(profile.SerializeAsString(), true);
- }
--}  // namespace
-+
-+std::pair<std::string, bool> ConvertHloProtoToMemoryViewer(
-+    const xla::HloProto& hlo_proto) {
-+  static constexpr int kSmallBufferSize = 16 * 1024;  // 16KB
-+  static constexpr int kMemorySpaceColor = 0;         // HBM
-+
-+  auto result_or = ConvertHloProtoToPreprocessResult(
-+      hlo_proto, kSmallBufferSize,
-+      GetHeapSimulatorTraceId(hlo_proto, kMemorySpaceColor), kMemorySpaceColor);
-+  if (!result_or.ok()) {
-+    LOG(ERROR) << "Failed to convert HLO proto to memory viewer result: "
-+               << result_or.status().message();
-+    return std::make_pair("", false);
-+  }
-+
-+  std::string json_output;
-+  tensorflow::protobuf::util::JsonPrintOptions options;
-+  options.always_print_primitive_fields = true;
-+  auto encoded_status = tensorflow::protobuf::util::MessageToJsonString(
-+      result_or.value(), &json_output, options);
-+  if (!encoded_status.ok()) {
-+    LOG(ERROR) << "Failed to convert memory viewer result to JSON format: "
-+               << encoded_status.message();
-+    return std::make_pair("", false);
-+  }
-+
-+  return std::make_pair(json_output, true);
-+}
-+
-+std::pair<std::string, bool> ConvertHloProtoToToolData(
-+    const std::vector<std::string>& xspace_paths,
-+    const absl::string_view tool_name,
-+    const absl::flat_hash_map<std::string, std::variant<int, std::string>>&
-+        options) {
-+  if (xspace_paths.empty()) {
-+    return std::make_pair("", false);
-+  }
-+
-+  // <options> must provide a hlo_module_name field to identify the HLO module.
-+  auto* result = gtl::FindOrNull(options, "hlo_module_name");
-+  if (!result) {
-+    LOG(ERROR) << "Can not find HLO module name from options.";
-+    return std::make_pair("", false);
-+  }
-+  const std::string* hlo_module_name = std::get_if<std::string>(result);
-+  if (!hlo_module_name || hlo_module_name->empty()) {
-+    LOG(ERROR) << "Can not find HLO module name from options.";
-+    return std::make_pair("", false);
-+  }
-+
-+  // Load HLO module from file.
-+  absl::string_view base_dir = tensorflow::io::Dirname(xspace_paths[0]);
-+  std::string hlo_proto_file_name =
-+      GetHloProtoFileName(base_dir, *hlo_module_name);
-+  xla::HloProto hlo_proto;
-+  tensorflow::Status status = tensorflow::ReadBinaryProto(
-+      tensorflow::Env::Default(), hlo_proto_file_name, &hlo_proto);
-+  if (!status.ok()) {
-+    LOG(ERROR) << "Failed to read HLO proto: " << status.error_message();
-+    return std::make_pair("", false);
-+  }
-+
-+  // Convert from HLO proto to tools data.
-+  if (tool_name == "memory_viewer") {
-+    return ConvertHloProtoToMemoryViewer(hlo_proto);
-+  } else {
-+    LOG(ERROR) << "Can not find tool: " << tool_name
-+               << ". Please update to the latest version of Tensorflow.";
-+    return std::make_pair("", false);
-+  }
-+}
- 
- std::pair<std::string, bool> ConvertMultiXSpacesToToolData(
-     const std::vector<XSpace>& xspaces,
-@@ -278,5 +352,6 @@ std::pair<std::string, bool> ConvertMultiXSpacesToToolData(
-   }
- }
- 
-+}  // namespace
- }  // namespace profiler
- }  // namespace tensorflow
---- a/tensorflow/core/platform/macros.h
-+++ b/tensorflow/core/platform/macros.h
-@@ -21,7 +21,7 @@ limitations under the License.
- // Compiler supports GCC-style attributes
- #define TF_ATTRIBUTE_NORETURN __attribute__((noreturn))
- #define TF_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline))
--#define TF_ATTRIBUTE_NOINLINE __attribute__((noinline))
-+#define TF_ATTRIBUTE_NOINLINE [[noinline]]
- #define TF_ATTRIBUTE_UNUSED __attribute__((unused))
- #define TF_ATTRIBUTE_COLD __attribute__((cold))
- #define TF_ATTRIBUTE_WEAK __attribute__((weak))
+diff --git a/tensorflow/compiler/xla/service/cpu/runtime_fp16.h b/tensorflow/compiler/xla/service/cpu/runtime_fp16.h
+index 9fe020d5937..32774c2f3c0 100644
 --- a/tensorflow/compiler/xla/service/cpu/runtime_fp16.h
 +++ b/tensorflow/compiler/xla/service/cpu/runtime_fp16.h
 @@ -18,12 +18,7 @@ limitations under the License.
@@ -129,4 +16,19 @@ index 6a3c045db5c..fbb8a84786e 100644
  // Older versions of Clang don't have _Float16. Since both float and _Float16
  // are passed in the same register we can use the wider type and careful casting
  // to conform to x86_64 psABI. This only works with the assumption that we're
- 
+diff --git a/tensorflow/core/profiler/convert/xplane_to_tools_data.cc b/tensorflow/core/profiler/convert/xplane_to_tools_data.cc
+index b70ea8af5df..b54a895e306 100644
+--- a/tensorflow/core/profiler/convert/xplane_to_tools_data.cc
++++ b/tensorflow/core/profiler/convert/xplane_to_tools_data.cc
+@@ -230,8 +230,9 @@ StatusOr<std::string> ConvertMultiXSpacesToToolData(
+     return ConvertMultiXSpacesToTfDataBottleneckAnalysis(session_snapshot);
+   } else if (tool_name == "op_profile") {
+     return ConvertMultiXSpacesToOpProfileViewer(session_snapshot);
+-  } else if (tool_name == "memory_viewer" || tool_name == "graph_viewer") {
+-    return ConvertHloProtoToToolData(session_snapshot, tool_name, options);
++    // this function is not being used by xla
++    // } else if (tool_name == "memory_viewer" || tool_name == "graph_viewer") {
++    //   return ConvertHloProtoToToolData(session_snapshot, tool_name, options);
+   } else if (tool_name == "tool_names") {
+     return GetAvailableToolNames(session_snapshot);
+   } else {
diff --git a/third_party/tensorflow b/third_party/tensorflow
@@ -1 +1 @@
-Subproject commit db52690f4b56c8b81076687850e146a28dd6a29a
+Subproject commit c60aa9632b0bbb75fa26f723207657d2b998d424
diff --git a/third_party/xla_client/BUILD b/third_party/xla_client/BUILD
@@ -179,6 +179,7 @@ cc_library(
     ] + if_cuda_is_configured([
         "@local_config_nccl//:nccl",
         "//tensorflow/compiler/jit:xla_gpu_device",
+        "//tensorflow/compiler/xla/stream_executor:cuda_platform",
     ]) + if_with_tpu_support([
         "//tensorflow/compiler/jit:xla_tpu_device",
         "//tensorflow/compiler/jit:xla_tpu_jit",
diff --git a/third_party/xla_client/pjrt_computation_client.cc b/third_party/xla_client/pjrt_computation_client.cc
@@ -112,7 +112,7 @@ std::vector<ComputationClient::DataPtr> PjRtComputationClient::TransferToServer(
     auto literal = std::make_shared<xla::Literal>(tensor.shape);
     tensor.populate_fn(tensor, literal->untyped_data(), literal->size_bytes());
     std::vector<int64_t> byte_strides(literal->shape().dimensions_size());
-    ShapeUtil::ByteStrides(literal->shape(), absl::MakeSpan(byte_strides));
+    XLA_CHECK_OK(ShapeUtil::ByteStrides(literal->shape(), absl::MakeSpan(byte_strides)));
     total_size += literal->size_bytes();
 
     // Avoid use-after-free on `literal` due to unsequenced move and use.
diff --git a/third_party/xla_client/tf_logging.h b/third_party/xla_client/tf_logging.h
@@ -4,7 +4,8 @@
 #include <sstream>
 
 #include "tensorflow/compiler/xla/status.h"
-#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/tsl/platform/logging.h"
+#include "tensorflow/tsl/platform/status.h"
 
 namespace xla {
 namespace internal {
@@ -15,19 +16,18 @@ namespace internal {
 // implementaiton of them.
 #define TF_LOG(severity) _TF_LOG_##severity
 
-#define TF_VLOG_IS_ON(lvl)                                                  \
-  (([](int level, const char* fname) {                                      \
-    static const bool vmodule_activated =                                   \
-        ::tensorflow::internal::LogMessage::VmoduleActivated(fname, level); \
-    return vmodule_activated;                                               \
+#define TF_VLOG_IS_ON(lvl)                                           \
+  (([](int level, const char* fname) {                               \
+    static const bool vmodule_activated =                            \
+        ::tsl::internal::LogMessage::VmoduleActivated(fname, level); \
+    return vmodule_activated;                                        \
   })(lvl, __FILE__))
 
-#define TF_VLOG(level)                                           \
-  TF_PREDICT_TRUE(!TF_VLOG_IS_ON(level))                         \
-  ? (void)0                                                      \
-  : ::tensorflow::internal::Voidifier() &                        \
-          ::tensorflow::internal::LogMessage(__FILE__, __LINE__, \
-                                             tensorflow::INFO)
+#define TF_VLOG(level)                   \
+  TF_PREDICT_TRUE(!TF_VLOG_IS_ON(level)) \
+  ? (void)0                              \
+  : ::tsl::internal::Voidifier() &       \
+          ::tsl::internal::LogMessage(__FILE__, __LINE__, ::tsl::INFO)
 
 struct ErrorSink : public std::basic_ostringstream<char> {};
 
@@ -54,12 +54,10 @@ class ErrorGenerator {
   while (TF_PREDICT_FALSE(!(condition))) \
   TF_ERROR_STREAM() << "Check failed: " #condition " "
 
-#define TF_CHECK_OP_LOG(name, op, val1, val2)                  \
-  while (::tensorflow::internal::CheckOpString _result{        \
-      ::tensorflow::internal::name##Impl(                      \
-          ::tensorflow::internal::GetReferenceableValue(val1), \
-          ::tensorflow::internal::GetReferenceableValue(val2), \
-          #val1 " " #op " " #val2)})                           \
+#define TF_CHECK_OP_LOG(name, op, val1, val2)                                  \
+  while (::tsl::internal::CheckOpString _result{::tsl::internal::name##Impl(   \
+      ::tsl::internal::GetReferenceableValue(val1),                            \
+      ::tsl::internal::GetReferenceableValue(val2), #val1 " " #op " " #val2)}) \
   TF_ERROR_STREAM() << *(_result.str_)
 
 #define TF_CHECK_OP(name, op, val1, val2) TF_CHECK_OP_LOG(name, op, val1, val2)
@@ -73,7 +71,7 @@ class ErrorGenerator {
 #define TF_CHECK_GT(val1, val2) TF_CHECK_OP(Check_GT, >, val1, val2)
 
 #undef TF_CHECK_OK
-#define TF_CHECK_OK(val) TF_CHECK_EQ(val, ::tensorflow::Status::OK())
+#define TF_CHECK_OK(val) TF_DO_CHECK_OK(val, FATAL)
 #define TF_CHECK_NOTNULL(val) TF_CHECK(val != nullptr)
 
 }  // namespace internal
diff --git a/third_party/xla_client/tf_version_script.lds b/third_party/xla_client/tf_version_script.lds
@@ -1,6 +1,7 @@
 tensorflow {
   global:
     *tensorflow*;
+    *tsl*;
     *absl*;
     *protobuf*;
     *perftools*gputools*;
diff --git a/torch_xla/__init__.py b/torch_xla/__init__.py
@@ -46,6 +46,8 @@ def _set_missing_flags(flags, sets):
 def _setup_xla_flags():
   flags = os.environ.get('XLA_FLAGS', '').split(' ')
   flags = _set_missing_flags(flags, (('xla_cpu_enable_fast_math', 'false'),))
+  flags = _set_missing_flags(
+      flags, (('xla_gpu_simplify_all_fp_conversions', 'false'),))
   os.environ['XLA_FLAGS'] = ' '.join(flags)
 
 
@@ -110,6 +112,7 @@ def _tpu_vm_init():
 # _tpu_vm_init() will update TPU_LIBRARY_PATH to Python package, if available
 os.environ['TPU_LIBRARY_PATH'] = '/dev/null'
 import _XLAC
+del os.environ['TPU_LIBRARY_PATH']
 
 _tpu_vm_init()