From 270b81cee43bdb763f582968ea75f3b25d75670e Mon Sep 17 00:00:00 2001 From: Rohan Yadav Date: Fri, 22 Jan 2021 12:23:29 -0800 Subject: [PATCH] tensor,module: add a utility to allow for debugging generated kernels This commit adds a function `debugCompileSource` to a `Tensor` that allows for the `Tensor` to use a kernel from a provided source file instead of generating a new one. This allows developers to add prints/assertions to TACO generated code to debug faster. Inspired by Amalee's PR (#302), I would have found a command like this very useful for debugging generated code. --- include/taco/codegen/module.h | 12 +++++++++ include/taco/tensor.h | 15 +++++++++++ src/codegen/module.cpp | 49 ++++++++++++++++++++++------------- src/tensor.cpp | 9 +++++++ 4 files changed, 67 insertions(+), 18 deletions(-) diff --git a/include/taco/codegen/module.h b/include/taco/codegen/module.h index 788156fdb..12bf858b3 100644 --- a/include/taco/codegen/module.h +++ b/include/taco/codegen/module.h @@ -25,6 +25,12 @@ class Module { /// Compile the source into a library, returning its full path std::string compile(); + + /// Compile the sources at the given prefix and link the compiled code. A + /// libPrefix is of the form path/prefix, where files path/prefix.{c, h} are + /// present. debugCompileSourceFile can be used to recompile an existing + /// generated code file with handwritten changes to debug. + void debugCompileSourceFile(std::string libPrefix); /// Compile the module into a source file located at the specified location /// path and prefix. The generated source will be path/prefix.{.c|.bc, .h} @@ -82,6 +88,12 @@ class Module { void setJITLibname(); void setJITTmpdir(); + + /// compileAndLink compiles the files at prefix into a library file named + /// output, and dynamically links output into the TACO process. libPrefix + /// is of the form path/prefix, where files path/prefix.{c, cu, h, cpp} + /// are placed to be compiled. + void compileAndLink(std::string libPrefix, std::string output); }; } // namespace ir diff --git a/include/taco/tensor.h b/include/taco/tensor.h index 313758252..1b1846e4c 100644 --- a/include/taco/tensor.h +++ b/include/taco/tensor.h @@ -413,6 +413,21 @@ class TensorBase { void compile(IndexStmt stmt, bool assembleWhileCompute=false); + /// debugCompileSource can be used to edit TACO generated code (to add prints + /// or assertions etc) and use the TACO machinery to execute the edited code. + /// debugCompileSource takes in a string libPrefix that is the path to a + /// group of TACO generated files. In particular, TACO generates files like + /// path/prefix.{c, h, ...}. In this case, libPrefix should equal "path/prefix". + /// An example workflow is as follows: + /// Tensor a; Tensor b; IndexVar i; + /// a(i) = b(i); + /// // a.compile(); Compile the expression once to generate code. + /// a.debugCompileSource("/tmp/...."); + /// a.evaluate(); + // TODO (rohany): This should only get compiled in a test/debug build, but + // I'm not sure that we have the flags set up to do this. + void debugCompileSource(std::string libPrefix); + /// Assemble the tensor storage, including index and value arrays. void assemble(); diff --git a/src/codegen/module.cpp b/src/codegen/module.cpp index 9d99c16f7..be92e3272 100644 --- a/src/codegen/module.cpp +++ b/src/codegen/module.cpp @@ -116,7 +116,20 @@ void writeShims(vector funcs, string path, string prefix) { string Module::compile() { string prefix = tmpdir+libname; string fullpath = prefix + ".so"; + + // open the output file & write out the source + compileToSource(tmpdir, libname); + // write out the shims + writeShims(funcs, tmpdir, libname); + + this->compileAndLink(prefix, fullpath); + + return fullpath; +} + +void Module::compileAndLink(std::string libPrefix, std::string output) { + // Construct the command to compile the source files. string cc; string cflags; string file_ending; @@ -124,44 +137,43 @@ string Module::compile() { if (should_use_CUDA_codegen()) { cc = util::getFromEnv("TACO_NVCC", "nvcc"); cflags = util::getFromEnv("TACO_NVCCFLAGS", - get_default_CUDA_compiler_flags()); + get_default_CUDA_compiler_flags()); file_ending = ".cu"; - shims_file = prefix + "_shims.cpp"; + shims_file = libPrefix + "_shims.cpp"; } else { cc = util::getFromEnv(target.compiler_env, target.compiler); cflags = util::getFromEnv("TACO_CFLAGS", - "-O3 -ffast-math -std=c99") + " -shared -fPIC"; + "-O3 -ffast-math -std=c99") + " -shared -fPIC"; #if USE_OPENMP cflags += " -fopenmp"; #endif file_ending = ".c"; shims_file = ""; } - - string cmd = cc + " " + cflags + " " + - prefix + file_ending + " " + shims_file + " " + - "-o " + fullpath + " -lm"; - // open the output file & write out the source - compileToSource(tmpdir, libname); - - // write out the shims - writeShims(funcs, tmpdir, libname); - - // now compile it + auto cmd = cc + " " + cflags + " " + + libPrefix + file_ending + " " + shims_file + " " + + "-o " + output + " -lm"; + + // Execute the compilation command. int err = system(cmd.data()); taco_uassert(err == 0) << "Compilation command failed:\n" << cmd - << "\nreturned " << err; + << "\nreturned " << err; - // use dlsym() to open the compiled library + // Use dlsym() to dynamically link the compiled library. if (lib_handle) { + // Close the existing handle one is open already. dlclose(lib_handle); } - lib_handle = dlopen(fullpath.data(), RTLD_NOW | RTLD_LOCAL); + + lib_handle = dlopen(output.data(), RTLD_NOW | RTLD_LOCAL); taco_uassert(lib_handle) << "Failed to load generated code"; +} - return fullpath; +void Module::debugCompileSourceFile(string libPrefix) { + // Directly compile the files at the target libPrefix. + this->compileAndLink(libPrefix, libPrefix + ".so"); } void Module::setSource(string source) { @@ -170,6 +182,7 @@ void Module::setSource(string source) { } string Module::getSource() { + cout << this->tmpdir << endl; return source.str(); } diff --git a/src/tensor.cpp b/src/tensor.cpp index 5e3407337..e6e1e8613 100644 --- a/src/tensor.cpp +++ b/src/tensor.cpp @@ -573,6 +573,7 @@ void TensorBase::compile() { stmt = parallelizeOuterLoop(stmt); compile(stmt, content->assembleWhileCompute); } + void TensorBase::compile(taco::IndexStmt stmt, bool assembleWhileCompute) { if (!needsCompile()) { return; @@ -602,6 +603,14 @@ void TensorBase::compile(taco::IndexStmt stmt, bool assembleWhileCompute) { cacheComputeKernel(concretizedAssign, content->module); } +void TensorBase::debugCompileSource(std::string libPrefix) { + // We're directly compiling user provided source, so mark compilation as done. + this->setNeedsCompile(false); + // Make a new module and compile the source. + content->module = make_shared(); + content->module->debugCompileSourceFile(libPrefix); +} + taco_tensor_t* TensorBase::getTacoTensorT() { return getStorage(); }