tensorflow · frankchn · Oct 2, 2017 · Sep 30, 2017 · Sep 30, 2017 · Sep 30, 2017
diff --git a/configure.py b/configure.py
@@ -30,7 +30,8 @@
 except ImportError:
   from distutils.spawn import find_executable as which
 
-_TF_BAZELRC = '.tf_configure.bazelrc'
+_TF_BAZELRC = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                           '.tf_configure.bazelrc')
 _DEFAULT_CUDA_VERSION = '8.0'
 _DEFAULT_CUDNN_VERSION = '6'
 _DEFAULT_CUDA_COMPUTE_CAPABILITIES = '3.5,5.2'

diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
@@ -576,6 +576,7 @@ cc_library(
         "//tensorflow/core:framework_internal",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:tensorflow_opensource",
         "//tensorflow/core:test",
         "//tensorflow/core:testlib",
         "//tensorflow/core/kernels:ops_util",

diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
@@ -1805,6 +1805,11 @@ static optional<int64> GetLoopTripCount(const HloInstruction* while_op) {
   HloEvaluator evaluator;
   auto* while_init = while_op->operand(0);
   auto* indvar_init = while_init->operand(*indvar_tuple_idx);
+  // TODO(b/67157142): This should not be redundant, remove this when the
+  // underlying issue has been addressed.
+  if (!hlo_query::AllOperandsAreConstants(*indvar_init)) {
+    return nullopt;
+  }
   StatusOr<std::unique_ptr<Literal>> indvar_init_result =
       evaluator.Evaluate(indvar_init->Clone().get());
   if (!indvar_init_result.ok()) {

diff --git a/tensorflow/compiler/xla/service/compiler.cc b/tensorflow/compiler/xla/service/compiler.cc
@@ -58,7 +58,8 @@ Compiler::GetPlatformCompilers() {
   LazyInitMutex();
   tensorflow::mutex_lock lock(*platform_compiler_mutex_);
   auto* factories = GetPlatformCompilerFactories();
-  CHECK(factories->find(platform_id) == factories->end());
+  CHECK(factories->find(platform_id) == factories->end())
+      << "Compiler factory already registered for platform";
   (*factories)[platform_id] = std::move(compiler_factory);
 }
 

diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -86,10 +86,8 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
-#include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
-#include "tensorflow/core/platform/env.h"
 
 namespace se = ::perftools::gputools;
 
@@ -367,68 +365,50 @@ llvm::CodeGenOpt::Level CodeGenOptLevel(const HloModuleConfig& module_config) {
   }
 }
 
-Status AppendIRToFile(const string& file_name, const string& ir_module_string) {
-  std::unique_ptr<tensorflow::WritableFile> f;
-  TF_RETURN_IF_ERROR(
-      tensorflow::Env::Default()->NewWritableFile(file_name, &f));
-  TF_RETURN_IF_ERROR(f->Append(ir_module_string));
-  TF_RETURN_IF_ERROR(f->Close());
-  return Status::OK();
-}
-
 Status InitializeModuleHooks(
-    const HloModule& module,
+    const HloModule& hlo_module,
     const LLVMCompiler::ModuleHook& user_pre_optimization_hook,
     const LLVMCompiler::ModuleHook& user_post_optimization_hook,
     LLVMCompiler::ModuleHook* pre_optimization_ir_hook,
     LLVMCompiler::ModuleHook* post_optimization_ir_hook) {
-  const string& dump_ir_to = module.config().debug_options().xla_dump_ir_to();
-  if (dump_ir_to.empty()) {
+  const string& ir_dump_directory =
+      hlo_module.config().debug_options().xla_dump_ir_to();
+  if (ir_dump_directory.empty()) {
     *pre_optimization_ir_hook = user_pre_optimization_hook;
     *post_optimization_ir_hook = user_post_optimization_hook;
     return Status::OK();
   }
 
-  // Initialize the output directory and create the output file names.
-  TF_RETURN_IF_ERROR(
-      tensorflow::Env::Default()->RecursivelyCreateDir(dump_ir_to));
-  string safe_file_name_base = module.name();
-  std::replace_if(safe_file_name_base.begin(), safe_file_name_base.end(),
-                  [](char c) { return c == '/' || c == '\\'; }, '_');
-
-  string unoptimized_ir_file_name = tensorflow::io::JoinPath(
-      dump_ir_to,
-      tensorflow::strings::StrCat("ir-", safe_file_name_base, "-no-opt.ll"));
-  string optimized_ir_file_name = tensorflow::io::JoinPath(
-      dump_ir_to,
-      tensorflow::strings::StrCat("ir-", safe_file_name_base, "-opt.ll"));
+  const string& hlo_module_name = hlo_module.name();
 
   // Create the IR hooks. If applicable, each IR hook does the following:
-  // * Call the user supplied module hook.
-  // * Write to the output directory. Files will be appended to. We still want
-  //   to append to avoid overwriting possibly important information due to
-  //   operator error.
+  //
+  //  * Calls the user supplied module hook.
+  //  * Writes out the IR to a file in the output directory designated by
+  //    --xla_dump_ir_to
 
   *pre_optimization_ir_hook =
-      [user_pre_optimization_hook,
-       unoptimized_ir_file_name](const llvm::Module& module) {
+      [user_pre_optimization_hook, ir_dump_directory,
+       hlo_module_name](const llvm::Module& llvm_module) {
         if (user_pre_optimization_hook) {
-          TF_RETURN_IF_ERROR(user_pre_optimization_hook(module));
+          TF_RETURN_IF_ERROR(user_pre_optimization_hook(llvm_module));
         }
-        TF_RETURN_IF_ERROR(AppendIRToFile(unoptimized_ir_file_name,
-                                          llvm_ir::DumpModuleToString(module)));
-        return Status::OK();
+        return llvm_ir::DumpIRToDirectory(/*directory_name=*/ir_dump_directory,
+                                          /*hlo_module_name=*/hlo_module_name,
+                                          llvm_module,
+                                          /*optimized=*/false);
       };
 
   *post_optimization_ir_hook =
-      [user_post_optimization_hook,
-       optimized_ir_file_name](const llvm::Module& module) {
+      [user_post_optimization_hook, ir_dump_directory,
+       hlo_module_name](const llvm::Module& llvm_module) {
         if (user_post_optimization_hook) {
-          TF_RETURN_IF_ERROR(user_post_optimization_hook(module));
+          TF_RETURN_IF_ERROR(user_post_optimization_hook(llvm_module));
         }
-        TF_RETURN_IF_ERROR(AppendIRToFile(optimized_ir_file_name,
-                                          llvm_ir::DumpModuleToString(module)));
-        return Status::OK();
+        return llvm_ir::DumpIRToDirectory(/*directory_name=*/ir_dump_directory,
+                                          /*hlo_module_name=*/hlo_module_name,
+                                          llvm_module,
+                                          /*optimized=*/true);
       };
 
   return Status::OK();

diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -304,17 +304,23 @@ Status IrEmitter::HandleCopy(HloInstruction* copy) {
 int IrEmitter::MinimumAlignmentForBufferSize(int64 buffer_size) {
   // GLibc returns a pointer with alignment 8 on 32-bit platforms and 16 on
   // 64-bit platforms.  TCMalloc returns a pointer with alignment 8 for
-  // allocations smaller than 16 bytes and at least alignment 16 for allocations
-  // greater than or equal to 16 bytes.  N.B. We could improve on this lower
-  // bound by explicitly allocating the memory with posix_memalign.  This is
+  // allocations smaller than kMallocAlignmentThreshold bytes and at least
+  // alignment 16 for allocations greater than or equal to
+  // kMallocAlignmentThreshold bytes.  N.B. We could improve on this lower bound
+  // by explicitly allocating the memory with posix_memalign.  This is
   // complicated by our desire to allow parameter buffers created by clients to
   // be consumed directly by the JIT.
   if (buffer_size == 0) {
     // No need to align empty buffers.
     return 1;
   }
+
+  const int64 kMallocAlignmentThreshold = 512;
+
   int pointer_size = module_->getDataLayout().getPointerSize();
-  int buffer_alignment = buffer_size >= 16 ? 2 * pointer_size : 8;
+  int buffer_alignment = buffer_size >= kMallocAlignmentThreshold
+                             ? 2 * pointer_size
+                             : pointer_size;
   DCHECK_GT(buffer_alignment, 0);
 
   return buffer_alignment;

diff --git a/tensorflow/compiler/xla/service/executable.cc b/tensorflow/compiler/xla/service/executable.cc
@@ -69,15 +69,6 @@ Status Executable::DumpSessionModule() {
                                      *session_module_);
 }
 
-// Removes illegal characters from filenames.
-static void SanitizeFilename(string* name) {
-  for (char& c : *name) {
-    if (c == '/' || c == '\\' || c == '[' || c == ']') {
-      c = '_';
-    }
-  }
-}
-
 /* static */ Status Executable::DumpToDirectory(
     const string& directory_path, string filename,
     const SessionModule& session_module) {
@@ -89,7 +80,7 @@ static void SanitizeFilename(string* name) {
     // "directory already exists" error.
     TF_RETURN_IF_ERROR(env->RecursivelyCreateDir(directory_path));
   }
-  SanitizeFilename(&filename);
+  filename = SanitizeFileName(std::move(filename));
   string file_path = tensorflow::io::JoinPath(directory_path, filename);
   return tensorflow::WriteBinaryProto(env, file_path, session_module);
 }

diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
@@ -341,6 +341,16 @@ StatusOr<std::unique_ptr<Executable>> GpuCompiler::Compile(
     XLA_VLOG_LINES(2, ir_module_string_before_opt);
   }
 
+  const string& ir_dump_directory =
+      module->config().debug_options().xla_dump_ir_to();
+
+  if (!ir_dump_directory.empty()) {
+    TF_RETURN_IF_ERROR(llvm_ir::DumpIRToDirectory(
+        /*directory_name=*/ir_dump_directory,
+        /*hlo_module_name=*/module->name(), llvm_module,
+        /*optimized=*/false));
+  }
+
   // Reserve space for the PTX to be generated for this module.
   string* ptx;
   {
@@ -363,6 +373,13 @@ StatusOr<std::unique_ptr<Executable>> GpuCompiler::Compile(
   TF_ASSIGN_OR_RETURN(*ptx, CompileToPtx(&llvm_module, {cc_major, cc_minor},
                                          module->config(), libdevice_dir_));
 
+  if (!ir_dump_directory.empty()) {
+    TF_RETURN_IF_ERROR(llvm_ir::DumpIRToDirectory(
+        /*directory_name=*/ir_dump_directory,
+        /*hlo_module_name=*/module->name(), llvm_module,
+        /*optimized=*/true));
+  }
+
   if (user_post_optimization_hook_) {
     TF_CHECK_OK(user_post_optimization_hook_(llvm_module));
   }

diff --git a/tensorflow/compiler/xla/service/llvm_ir/BUILD b/tensorflow/compiler/xla/service/llvm_ir/BUILD
@@ -45,6 +45,7 @@ cc_library(
         "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:types",
+        "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/compiler/xla/service:hlo",
         "//tensorflow/core:lib",

diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
 
 #include <algorithm>
+#include <memory>
 #include <vector>
 
 #include "llvm/IR/MDBuilder.h"
@@ -25,9 +26,12 @@ limitations under the License.
 #include "tensorflow/compiler/xla/literal_util.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/lib/core/casts.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -582,5 +586,23 @@ std::map<int, llvm::MDNode*> MergeMetadata(
   return result;
 }
 
+Status DumpIRToDirectory(const string& directory_name,
+                         const string& hlo_module_name,
+                         const llvm::Module& llvm_module, bool optimized) {
+  string safe_file_name_base = SanitizeFileName(hlo_module_name);
+  string ir_file_name = tensorflow::io::JoinPath(
+      directory_name,
+      tensorflow::strings::StrCat("ir-", safe_file_name_base, "-",
+                                  optimized ? "with" : "no", "-opt.ll"));
+
+  std::unique_ptr<tensorflow::WritableFile> f;
+  TF_RETURN_IF_ERROR(
+      tensorflow::Env::Default()->RecursivelyCreateDir(directory_name));
+  TF_RETURN_IF_ERROR(
+      tensorflow::Env::Default()->NewWritableFile(ir_file_name, &f));
+  TF_RETURN_IF_ERROR(f->Append(DumpModuleToString(llvm_module)));
+  return f->Close();
+}
+
 }  // namespace llvm_ir
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h
@@ -273,6 +273,15 @@ std::map<int, llvm::MDNode*> MergeMetadata(
     llvm::LLVMContext* context, const std::map<int, llvm::MDNode*>& a,
     const std::map<int, llvm::MDNode*>& b);
 
+// Dumps out `llvm_module` to a file in the directory named `directory_name`,
+// creating the directory if necessary.  A sanitized version of
+// `hlo_module_name` is incorporated into the file name.  If `optimized` is true
+// then a suffix of "-with-opt.ll" is used, else a suffix of "-no-opt.ll" is
+// used.
+Status DumpIRToDirectory(const string& directory_name,
+                         const string& hlo_module_name,
+                         const llvm::Module& llvm_module, bool optimized);
+
 }  // namespace llvm_ir
 }  // namespace xla
 

diff --git a/tensorflow/compiler/xla/util.cc b/tensorflow/compiler/xla/util.cc
@@ -336,4 +336,13 @@ std::vector<std::pair<int64, int64>> CommonFactors(
   return bounds;
 }
 
+string SanitizeFileName(string file_name) {
+  for (char& c : file_name) {
+    if (c == '/' || c == '\\' || c == '[' || c == ']') {
+      c = '_';
+    }
+  }
+  return file_name;
+}
+
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/util.h b/tensorflow/compiler/xla/util.h
@@ -361,6 +361,9 @@ int64 Product(tensorflow::gtl::ArraySlice<int64> xs);
 std::vector<std::pair<int64, int64>> CommonFactors(
     tensorflow::gtl::ArraySlice<int64> a, tensorflow::gtl::ArraySlice<int64> b);
 
+// Removes illegal characters from filenames.
+string SanitizeFileName(string file_name);
+
 }  // namespace xla
 
 #define XLA_LOG_LINES(SEV, STRING) \

diff --git a/tensorflow/compiler/xla/util_test.cc b/tensorflow/compiler/xla/util_test.cc
@@ -122,5 +122,12 @@ TEST(UtilTest, CommonFactors) {
   }
 }
 
+TEST(UtilTest, SanitizeFileName) {
+  EXPECT_EQ(SanitizeFileName(""), "");
+  EXPECT_EQ(SanitizeFileName("abc"), "abc");
+  EXPECT_EQ(SanitizeFileName("/\\[]"), "____");
+  EXPECT_EQ(SanitizeFileName("/A\\B[C]"), "_A_B_C_");
+}
+
 }  // namespace
 }  // namespace xla