Skip to content

Commit 9034115

Browse files
JackCaoGrootalanwaketan
authored
TF/libtpu Pin Update (#4036)
Summary: This commit is to bump TF to c60aa9632b0bbb75fa26f723207657d2b998d424 and libtpu to 0.1.dev20220930. Test Plan: CI and local resnet runs. Co-authored-by: root <root@t1v-n-7513beeb-w-0.us-central1-a.c.tpu-pytorch.internal> Co-authored-by: Jiewen Tan <jwtan@google.com>
1 parent 1874859 commit 9034115

File tree

9 files changed

+48
-139
lines changed

9 files changed

+48
-139
lines changed

build_torch_xla_libs.sh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,11 @@ else
8484
cp -r -u -p $THIRD_PARTY_DIR/xla_client $THIRD_PARTY_DIR/tensorflow/tensorflow/compiler/xla/
8585

8686
pushd $THIRD_PARTY_DIR/tensorflow
87-
bazel build $MAX_JOBS $VERBOSE $TPUVM_FLAG --spawn_strategy=$BUILD_STRATEGY --show_progress_rate_limit=20 \
87+
# TensorFlow and its dependencies may introduce warning flags from newer compilers
88+
# that PyTorch and PyTorch/XLA's default compilers don't recognize. They become error
89+
# while '-Werror' is used. Therefore, surpress the warnings.
90+
TF_EXTRA_FLAGS="--copt=-Wno-unknown-warning-option"
91+
bazel build $MAX_JOBS $VERBOSE $TPUVM_FLAG $TF_EXTRA_FLAGS --spawn_strategy=$BUILD_STRATEGY --show_progress_rate_limit=20 \
8892
--define framework_shared_object=false -c "$MODE" "${OPTS[@]}" \
8993
$XLA_CUDA_CFG //tensorflow/compiler/xla/xla_client:libxla_computation_client.so
9094

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
base_dir = os.path.dirname(os.path.abspath(__file__))
5454
third_party_path = os.path.join(base_dir, 'third_party')
5555

56-
_libtpu_version = '0.1.dev20220816'
56+
_libtpu_version = '0.1.dev20220930'
5757
_libtpu_storage_path = f'https://storage.googleapis.com/cloud-tpu-tpuvm-artifacts/wheels/libtpu-nightly/libtpu_nightly-{_libtpu_version}-py3-none-any.whl'
5858

5959

tf_patches/patch.diff

Lines changed: 18 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -1,118 +1,5 @@
1-
diff --git a/tensorflow/core/profiler/convert/xplane_to_tools_data.cc b/tensorflow/core/profiler/convert/xplane_to_tools_data.cc
2-
index 6a3c045db5c..fbb8a84786e 100644
3-
--- a/tensorflow/core/profiler/convert/xplane_to_tools_data.cc
4-
+++ b/tensorflow/core/profiler/convert/xplane_to_tools_data.cc
5-
@@ -20,9 +20,13 @@ limitations under the License.
6-
7-
#include "absl/strings/str_format.h"
8-
#include "absl/strings/string_view.h"
9-
+#include "tensorflow/compiler/xla/service/hlo.pb.h"
10-
+#include "tensorflow/core/lib/gtl/map_util.h"
11-
#include "tensorflow/core/platform/env.h"
12-
#include "tensorflow/core/platform/logging.h"
13-
+#include "tensorflow/core/platform/path.h"
14-
#include "tensorflow/core/platform/protobuf.h"
15-
+#include "tensorflow/core/profiler/convert/hlo_proto_to_memory_visualization_utils.h"
16-
#include "tensorflow/core/profiler/convert/hlo_to_tools_data.h"
17-
#include "tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h"
18-
#include "tensorflow/core/profiler/convert/op_stats_to_op_profile.h"
19-
@@ -232,7 +236,77 @@ std::pair<std::string, bool> ConvertMultiXSpacesToOpProfileViewer(
20-
21-
return std::make_pair(profile.SerializeAsString(), true);
22-
}
23-
-} // namespace
24-
+
25-
+std::pair<std::string, bool> ConvertHloProtoToMemoryViewer(
26-
+ const xla::HloProto& hlo_proto) {
27-
+ static constexpr int kSmallBufferSize = 16 * 1024; // 16KB
28-
+ static constexpr int kMemorySpaceColor = 0; // HBM
29-
+
30-
+ auto result_or = ConvertHloProtoToPreprocessResult(
31-
+ hlo_proto, kSmallBufferSize,
32-
+ GetHeapSimulatorTraceId(hlo_proto, kMemorySpaceColor), kMemorySpaceColor);
33-
+ if (!result_or.ok()) {
34-
+ LOG(ERROR) << "Failed to convert HLO proto to memory viewer result: "
35-
+ << result_or.status().message();
36-
+ return std::make_pair("", false);
37-
+ }
38-
+
39-
+ std::string json_output;
40-
+ tensorflow::protobuf::util::JsonPrintOptions options;
41-
+ options.always_print_primitive_fields = true;
42-
+ auto encoded_status = tensorflow::protobuf::util::MessageToJsonString(
43-
+ result_or.value(), &json_output, options);
44-
+ if (!encoded_status.ok()) {
45-
+ LOG(ERROR) << "Failed to convert memory viewer result to JSON format: "
46-
+ << encoded_status.message();
47-
+ return std::make_pair("", false);
48-
+ }
49-
+
50-
+ return std::make_pair(json_output, true);
51-
+}
52-
+
53-
+std::pair<std::string, bool> ConvertHloProtoToToolData(
54-
+ const std::vector<std::string>& xspace_paths,
55-
+ const absl::string_view tool_name,
56-
+ const absl::flat_hash_map<std::string, std::variant<int, std::string>>&
57-
+ options) {
58-
+ if (xspace_paths.empty()) {
59-
+ return std::make_pair("", false);
60-
+ }
61-
+
62-
+ // <options> must provide a hlo_module_name field to identify the HLO module.
63-
+ auto* result = gtl::FindOrNull(options, "hlo_module_name");
64-
+ if (!result) {
65-
+ LOG(ERROR) << "Can not find HLO module name from options.";
66-
+ return std::make_pair("", false);
67-
+ }
68-
+ const std::string* hlo_module_name = std::get_if<std::string>(result);
69-
+ if (!hlo_module_name || hlo_module_name->empty()) {
70-
+ LOG(ERROR) << "Can not find HLO module name from options.";
71-
+ return std::make_pair("", false);
72-
+ }
73-
+
74-
+ // Load HLO module from file.
75-
+ absl::string_view base_dir = tensorflow::io::Dirname(xspace_paths[0]);
76-
+ std::string hlo_proto_file_name =
77-
+ GetHloProtoFileName(base_dir, *hlo_module_name);
78-
+ xla::HloProto hlo_proto;
79-
+ tensorflow::Status status = tensorflow::ReadBinaryProto(
80-
+ tensorflow::Env::Default(), hlo_proto_file_name, &hlo_proto);
81-
+ if (!status.ok()) {
82-
+ LOG(ERROR) << "Failed to read HLO proto: " << status.error_message();
83-
+ return std::make_pair("", false);
84-
+ }
85-
+
86-
+ // Convert from HLO proto to tools data.
87-
+ if (tool_name == "memory_viewer") {
88-
+ return ConvertHloProtoToMemoryViewer(hlo_proto);
89-
+ } else {
90-
+ LOG(ERROR) << "Can not find tool: " << tool_name
91-
+ << ". Please update to the latest version of Tensorflow.";
92-
+ return std::make_pair("", false);
93-
+ }
94-
+}
95-
96-
std::pair<std::string, bool> ConvertMultiXSpacesToToolData(
97-
const std::vector<XSpace>& xspaces,
98-
@@ -278,5 +352,6 @@ std::pair<std::string, bool> ConvertMultiXSpacesToToolData(
99-
}
100-
}
101-
102-
+} // namespace
103-
} // namespace profiler
104-
} // namespace tensorflow
105-
--- a/tensorflow/core/platform/macros.h
106-
+++ b/tensorflow/core/platform/macros.h
107-
@@ -21,7 +21,7 @@ limitations under the License.
108-
// Compiler supports GCC-style attributes
109-
#define TF_ATTRIBUTE_NORETURN __attribute__((noreturn))
110-
#define TF_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline))
111-
-#define TF_ATTRIBUTE_NOINLINE __attribute__((noinline))
112-
+#define TF_ATTRIBUTE_NOINLINE [[noinline]]
113-
#define TF_ATTRIBUTE_UNUSED __attribute__((unused))
114-
#define TF_ATTRIBUTE_COLD __attribute__((cold))
115-
#define TF_ATTRIBUTE_WEAK __attribute__((weak))
1+
diff --git a/tensorflow/compiler/xla/service/cpu/runtime_fp16.h b/tensorflow/compiler/xla/service/cpu/runtime_fp16.h
2+
index 9fe020d5937..32774c2f3c0 100644
1163
--- a/tensorflow/compiler/xla/service/cpu/runtime_fp16.h
1174
+++ b/tensorflow/compiler/xla/service/cpu/runtime_fp16.h
1185
@@ -18,12 +18,7 @@ limitations under the License.
@@ -129,4 +16,19 @@ index 6a3c045db5c..fbb8a84786e 100644
12916
// Older versions of Clang don't have _Float16. Since both float and _Float16
13017
// are passed in the same register we can use the wider type and careful casting
13118
// to conform to x86_64 psABI. This only works with the assumption that we're
132-
19+
diff --git a/tensorflow/core/profiler/convert/xplane_to_tools_data.cc b/tensorflow/core/profiler/convert/xplane_to_tools_data.cc
20+
index b70ea8af5df..b54a895e306 100644
21+
--- a/tensorflow/core/profiler/convert/xplane_to_tools_data.cc
22+
+++ b/tensorflow/core/profiler/convert/xplane_to_tools_data.cc
23+
@@ -230,8 +230,9 @@ StatusOr<std::string> ConvertMultiXSpacesToToolData(
24+
return ConvertMultiXSpacesToTfDataBottleneckAnalysis(session_snapshot);
25+
} else if (tool_name == "op_profile") {
26+
return ConvertMultiXSpacesToOpProfileViewer(session_snapshot);
27+
- } else if (tool_name == "memory_viewer" || tool_name == "graph_viewer") {
28+
- return ConvertHloProtoToToolData(session_snapshot, tool_name, options);
29+
+ // this function is not being used by xla
30+
+ // } else if (tool_name == "memory_viewer" || tool_name == "graph_viewer") {
31+
+ // return ConvertHloProtoToToolData(session_snapshot, tool_name, options);
32+
} else if (tool_name == "tool_names") {
33+
return GetAvailableToolNames(session_snapshot);
34+
} else {

third_party/tensorflow

Submodule tensorflow updated 5049 files

third_party/xla_client/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ cc_library(
179179
] + if_cuda_is_configured([
180180
"@local_config_nccl//:nccl",
181181
"//tensorflow/compiler/jit:xla_gpu_device",
182+
"//tensorflow/compiler/xla/stream_executor:cuda_platform",
182183
]) + if_with_tpu_support([
183184
"//tensorflow/compiler/jit:xla_tpu_device",
184185
"//tensorflow/compiler/jit:xla_tpu_jit",

third_party/xla_client/pjrt_computation_client.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ std::vector<ComputationClient::DataPtr> PjRtComputationClient::TransferToServer(
112112
auto literal = std::make_shared<xla::Literal>(tensor.shape);
113113
tensor.populate_fn(tensor, literal->untyped_data(), literal->size_bytes());
114114
std::vector<int64_t> byte_strides(literal->shape().dimensions_size());
115-
ShapeUtil::ByteStrides(literal->shape(), absl::MakeSpan(byte_strides));
115+
XLA_CHECK_OK(ShapeUtil::ByteStrides(literal->shape(), absl::MakeSpan(byte_strides)));
116116
total_size += literal->size_bytes();
117117

118118
// Avoid use-after-free on `literal` due to unsequenced move and use.

third_party/xla_client/tf_logging.h

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
#include <sstream>
55

66
#include "tensorflow/compiler/xla/status.h"
7-
#include "tensorflow/core/platform/logging.h"
7+
#include "tensorflow/tsl/platform/logging.h"
8+
#include "tensorflow/tsl/platform/status.h"
89

910
namespace xla {
1011
namespace internal {
@@ -15,19 +16,18 @@ namespace internal {
1516
// implementaiton of them.
1617
#define TF_LOG(severity) _TF_LOG_##severity
1718

18-
#define TF_VLOG_IS_ON(lvl) \
19-
(([](int level, const char* fname) { \
20-
static const bool vmodule_activated = \
21-
::tensorflow::internal::LogMessage::VmoduleActivated(fname, level); \
22-
return vmodule_activated; \
19+
#define TF_VLOG_IS_ON(lvl) \
20+
(([](int level, const char* fname) { \
21+
static const bool vmodule_activated = \
22+
::tsl::internal::LogMessage::VmoduleActivated(fname, level); \
23+
return vmodule_activated; \
2324
})(lvl, __FILE__))
2425

25-
#define TF_VLOG(level) \
26-
TF_PREDICT_TRUE(!TF_VLOG_IS_ON(level)) \
27-
? (void)0 \
28-
: ::tensorflow::internal::Voidifier() & \
29-
::tensorflow::internal::LogMessage(__FILE__, __LINE__, \
30-
tensorflow::INFO)
26+
#define TF_VLOG(level) \
27+
TF_PREDICT_TRUE(!TF_VLOG_IS_ON(level)) \
28+
? (void)0 \
29+
: ::tsl::internal::Voidifier() & \
30+
::tsl::internal::LogMessage(__FILE__, __LINE__, ::tsl::INFO)
3131

3232
struct ErrorSink : public std::basic_ostringstream<char> {};
3333

@@ -54,12 +54,10 @@ class ErrorGenerator {
5454
while (TF_PREDICT_FALSE(!(condition))) \
5555
TF_ERROR_STREAM() << "Check failed: " #condition " "
5656

57-
#define TF_CHECK_OP_LOG(name, op, val1, val2) \
58-
while (::tensorflow::internal::CheckOpString _result{ \
59-
::tensorflow::internal::name##Impl( \
60-
::tensorflow::internal::GetReferenceableValue(val1), \
61-
::tensorflow::internal::GetReferenceableValue(val2), \
62-
#val1 " " #op " " #val2)}) \
57+
#define TF_CHECK_OP_LOG(name, op, val1, val2) \
58+
while (::tsl::internal::CheckOpString _result{::tsl::internal::name##Impl( \
59+
::tsl::internal::GetReferenceableValue(val1), \
60+
::tsl::internal::GetReferenceableValue(val2), #val1 " " #op " " #val2)}) \
6361
TF_ERROR_STREAM() << *(_result.str_)
6462

6563
#define TF_CHECK_OP(name, op, val1, val2) TF_CHECK_OP_LOG(name, op, val1, val2)
@@ -73,7 +71,7 @@ class ErrorGenerator {
7371
#define TF_CHECK_GT(val1, val2) TF_CHECK_OP(Check_GT, >, val1, val2)
7472

7573
#undef TF_CHECK_OK
76-
#define TF_CHECK_OK(val) TF_CHECK_EQ(val, ::tensorflow::Status::OK())
74+
#define TF_CHECK_OK(val) TF_DO_CHECK_OK(val, FATAL)
7775
#define TF_CHECK_NOTNULL(val) TF_CHECK(val != nullptr)
7876

7977
} // namespace internal

third_party/xla_client/tf_version_script.lds

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
tensorflow {
22
global:
33
*tensorflow*;
4+
*tsl*;
45
*absl*;
56
*protobuf*;
67
*perftools*gputools*;

torch_xla/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ def _set_missing_flags(flags, sets):
4646
def _setup_xla_flags():
4747
flags = os.environ.get('XLA_FLAGS', '').split(' ')
4848
flags = _set_missing_flags(flags, (('xla_cpu_enable_fast_math', 'false'),))
49+
flags = _set_missing_flags(
50+
flags, (('xla_gpu_simplify_all_fp_conversions', 'false'),))
4951
os.environ['XLA_FLAGS'] = ' '.join(flags)
5052

5153

@@ -110,6 +112,7 @@ def _tpu_vm_init():
110112
# _tpu_vm_init() will update TPU_LIBRARY_PATH to Python package, if available
111113
os.environ['TPU_LIBRARY_PATH'] = '/dev/null'
112114
import _XLAC
115+
del os.environ['TPU_LIBRARY_PATH']
113116

114117
_tpu_vm_init()
115118

0 commit comments

Comments
 (0)