tensor,module: add a utility to allow for debugging generated kernels

This commit adds a function `debugCompileSource` to a `Tensor` that allows for the `Tensor` to use a kernel from a provided source file instead of generating a new one. This allows developers to add prints/assertions to TACO generated code to debug faster. Inspired by Amalee's PR (tensor-compiler#302), I would have found a command like this very useful for debugging generated code.
rohany · Jan 22, 2021 · 270b81c · 270b81c
1 parent 468ad7f
commit 270b81c
Show file tree

Hide file tree

Showing 4 changed files with 67 additions and 18 deletions.
diff --git a/include/taco/codegen/module.h b/include/taco/codegen/module.h
@@ -25,6 +25,12 @@ class Module {
 
   /// Compile the source into a library, returning its full path
   std::string compile();
+
+  /// Compile the sources at the given prefix and link the compiled code. A
+  /// libPrefix is of the form path/prefix, where files path/prefix.{c, h} are
+  /// present. debugCompileSourceFile can be used to recompile an existing
+  /// generated code file with handwritten changes to debug.
+  void debugCompileSourceFile(std::string libPrefix);
 
   /// Compile the module into a source file located at the specified location
   /// path and prefix.  The generated source will be path/prefix.{.c|.bc, .h}
@@ -82,6 +88,12 @@ class Module {
 
   void setJITLibname();
   void setJITTmpdir();
+
+  /// compileAndLink compiles the files at prefix into a library file named
+  /// output, and dynamically links output into the TACO process. libPrefix
+  /// is of the form path/prefix, where files path/prefix.{c, cu, h, cpp}
+  /// are placed to be compiled.
+  void compileAndLink(std::string libPrefix, std::string output);
 };
 
 } // namespace ir

diff --git a/include/taco/tensor.h b/include/taco/tensor.h
@@ -413,6 +413,21 @@ class TensorBase {
 
   void compile(IndexStmt stmt, bool assembleWhileCompute=false);
 
+  /// debugCompileSource can be used to edit TACO generated code (to add prints
+  /// or assertions etc) and use the TACO machinery to execute the edited code.
+  /// debugCompileSource takes in a string libPrefix that is the path to a
+  /// group of TACO generated files. In particular, TACO generates files like
+  /// path/prefix.{c, h, ...}. In this case, libPrefix should equal "path/prefix".
+  /// An example workflow is as follows:
+  ///    Tensor a; Tensor b; IndexVar i;
+  ///    a(i) = b(i);
+  ///    // a.compile(); Compile the expression once to generate code.
+  ///    a.debugCompileSource("/tmp/....");
+  ///    a.evaluate();
+  // TODO (rohany): This should only get compiled in a test/debug build, but
+  //  I'm not sure that we have the flags set up to do this.
+  void debugCompileSource(std::string libPrefix);
+
   /// Assemble the tensor storage, including index and value arrays.
   void assemble();
 

diff --git a/src/codegen/module.cpp b/src/codegen/module.cpp
@@ -116,52 +116,64 @@ void writeShims(vector<Stmt> funcs, string path, string prefix) {
 string Module::compile() {
   string prefix = tmpdir+libname;
   string fullpath = prefix + ".so";
+
+  // open the output file & write out the source
+  compileToSource(tmpdir, libname);
 
+  // write out the shims
+  writeShims(funcs, tmpdir, libname);
+
+  this->compileAndLink(prefix, fullpath);
+
+  return fullpath;
+}
+
+void Module::compileAndLink(std::string libPrefix, std::string output) {
+  // Construct the command to compile the source files.
   string cc;
   string cflags;
   string file_ending;
   string shims_file;
   if (should_use_CUDA_codegen()) {
     cc = util::getFromEnv("TACO_NVCC", "nvcc");
     cflags = util::getFromEnv("TACO_NVCCFLAGS",
-    get_default_CUDA_compiler_flags());
+                              get_default_CUDA_compiler_flags());
     file_ending = ".cu";
-    shims_file = prefix + "_shims.cpp";
+    shims_file = libPrefix + "_shims.cpp";
   }
   else {
     cc = util::getFromEnv(target.compiler_env, target.compiler);
     cflags = util::getFromEnv("TACO_CFLAGS",
-    "-O3 -ffast-math -std=c99") + " -shared -fPIC";
+                              "-O3 -ffast-math -std=c99") + " -shared -fPIC";
 #if USE_OPENMP
     cflags += " -fopenmp";
 #endif
     file_ending = ".c";
     shims_file = "";
   }
-
-  string cmd = cc + " " + cflags + " " +
-    prefix + file_ending + " " + shims_file + " " + 
-    "-o " + fullpath + " -lm";
 
-  // open the output file & write out the source
-  compileToSource(tmpdir, libname);
-
-  // write out the shims
-  writeShims(funcs, tmpdir, libname);
-
-  // now compile it
+  auto cmd = cc + " " + cflags + " " +
+             libPrefix + file_ending + " " + shims_file + " " +
+             "-o " + output + " -lm";
+
+  // Execute the compilation command.
   int err = system(cmd.data());
   taco_uassert(err == 0) << "Compilation command failed:\n" << cmd
-    << "\nreturned " << err;
+                         << "\nreturned " << err;
 
-  // use dlsym() to open the compiled library
+  // Use dlsym() to dynamically link the compiled library.
   if (lib_handle) {
+    // Close the existing handle one is open already.
     dlclose(lib_handle);
   }
-  lib_handle = dlopen(fullpath.data(), RTLD_NOW | RTLD_LOCAL);
+
+  lib_handle = dlopen(output.data(), RTLD_NOW | RTLD_LOCAL);
   taco_uassert(lib_handle) << "Failed to load generated code";
+}
 
-  return fullpath;
+void Module::debugCompileSourceFile(string libPrefix) {
+  // Directly compile the files at the target libPrefix.
+  this->compileAndLink(libPrefix, libPrefix + ".so");
 }
 
 void Module::setSource(string source) {
@@ -170,6 +182,7 @@ void Module::setSource(string source) {
 }
 
 string Module::getSource() {
+  cout << this->tmpdir << endl;
   return source.str();
 }
 

diff --git a/src/tensor.cpp b/src/tensor.cpp
@@ -573,6 +573,7 @@ void TensorBase::compile() {
   stmt = parallelizeOuterLoop(stmt);
   compile(stmt, content->assembleWhileCompute);
 }
+
 void TensorBase::compile(taco::IndexStmt stmt, bool assembleWhileCompute) {
   if (!needsCompile()) {
     return;
@@ -602,6 +603,14 @@ void TensorBase::compile(taco::IndexStmt stmt, bool assembleWhileCompute) {
   cacheComputeKernel(concretizedAssign, content->module);
 }
 
+void TensorBase::debugCompileSource(std::string libPrefix) {
+  // We're directly compiling user provided source, so mark compilation as done.
+  this->setNeedsCompile(false);
+  // Make a new module and compile the source.
+  content->module = make_shared<Module>();
+  content->module->debugCompileSourceFile(libPrefix);
+}
+
 taco_tensor_t* TensorBase::getTacoTensorT() {
   return getStorage();
 }