[NNC] Generate C++ code for Allocate and Free (#51070)

Summary: This is the initial skeleton for C++ codegen, it includes generations for Allocate and Free. Pull Request resolved: #51070 Test Plan: New unit tests are added to `test_cpp_codegen.cpp`. Reviewed By: ZolotukhinM Differential Revision: D26061818 Pulled By: cheng-chang fbshipit-source-id: b5256b2dcee6b2583ba73b6c9684994dbe7cdc1f
pytorch · Feb 1, 2021 · 109bc10 · 109bc10
1 parent 642afcb
commit 109bc10
Show file tree

Hide file tree

Showing 5 changed files with 129 additions and 0 deletions.
diff --git a/test/cpp/tensorexpr/CMakeLists.txt b/test/cpp/tensorexpr/CMakeLists.txt
@@ -4,6 +4,7 @@ set(TENSOREXPR_TEST_SRCS
   ${TENSOREXPR_TEST_ROOT}/test_aten.cpp
   ${TENSOREXPR_TEST_ROOT}/test_boundsinference.cpp
   ${TENSOREXPR_TEST_ROOT}/test_conv.cpp
+  ${TENSOREXPR_TEST_ROOT}/test_cpp_codegen.cpp
   ${TENSOREXPR_TEST_ROOT}/test_expr.cpp
   ${TENSOREXPR_TEST_ROOT}/test_ir_printer.cpp
   ${TENSOREXPR_TEST_ROOT}/test_kernel.cpp

diff --git a/test/cpp/tensorexpr/test_cpp_codegen.cpp b/test/cpp/tensorexpr/test_cpp_codegen.cpp
@@ -0,0 +1,57 @@
+#include <gtest/gtest.h>
+
+#include <test/cpp/tensorexpr/test_base.h>
+
+#include <torch/csrc/jit/testing/file_check.h>
+#include "torch/csrc/jit/tensorexpr/cpp_codegen.h"
+#include "torch/csrc/jit/tensorexpr/mem_arena.h"
+#include "torch/csrc/jit/tensorexpr/stmt.h"
+
+namespace torch {
+namespace jit {
+
+using namespace torch::jit::tensorexpr;
+
+TEST(CppPrinter, AllocateOnStackThenFree) {
+  constexpr int dim0 = 2, dim1 = 3;
+  KernelScope kernel_scope;
+  VarHandle var("x", kHandle);
+  Allocate* alloc = Allocate::make(var, kInt, {dim0, dim1});
+  Free* free = Free::make(var);
+  Block* block = Block::make({alloc, free});
+
+  std::stringstream ss;
+  CppPrinter printer(&ss);
+  printer.visit(block);
+  const std::string expected = R"(
+    # CHECK: {
+    # CHECK:   int x[6];
+    # CHECK: }
+  )";
+  torch::jit::testing::FileCheck().run(expected, ss.str());
+}
+
+TEST(CppPrinter, AllocateOnHeapThenFree) {
+  constexpr int dim0 = 20, dim1 = 50, dim2 = 3;
+  KernelScope kernel_scope;
+  VarHandle var("y", kHandle);
+  Allocate* alloc = Allocate::make(var, kLong, {dim0, dim1, dim2});
+  Free* free = Free::make(var);
+  Block* block = Block::make({alloc, free});
+
+  std::stringstream ss;
+  CppPrinter printer(&ss);
+  printer.visit(block);
+  // size(long) = 8;
+  // dim0 * dim1 * dim2 * size(long) = 24000.
+  const std::string expected = R"(
+    # CHECK: {
+    # CHECK:   int64_t* y = static_cast<int64_t*>(malloc(24000));
+    # CHECK:   free(y);
+    # CHECK: }
+  )";
+  torch::jit::testing::FileCheck().run(expected, ss.str());
+}
+
+} // namespace jit
+} // namespace torch
diff --git a/tools/build_variables.bzl b/tools/build_variables.bzl
@@ -241,6 +241,7 @@ core_sources_full_mobile = [
     "torch/csrc/jit/tensorexpr/bounds_overlap.cpp",
     "torch/csrc/jit/tensorexpr/mem_dependency_checker.cpp",
     "torch/csrc/jit/tensorexpr/codegen.cpp",
+    "torch/csrc/jit/tensorexpr/cpp_codegen.cpp",
     "torch/csrc/jit/tensorexpr/eval.cpp",
     "torch/csrc/jit/tensorexpr/expr.cpp",
     "torch/csrc/jit/tensorexpr/hash_provider.cpp",

diff --git a/torch/csrc/jit/tensorexpr/cpp_codegen.cpp b/torch/csrc/jit/tensorexpr/cpp_codegen.cpp
@@ -0,0 +1,44 @@
+#include <torch/csrc/jit/tensorexpr/cpp_codegen.h>
+
+namespace torch {
+namespace jit {
+namespace tensorexpr {
+
+void CppPrinter::visit(const Allocate* alloc) {
+  constexpr size_t kAllocOnStackThresholdSize = 512;
+
+  size_t size = 1;
+  for (auto dim : alloc->dims()) {
+    const IntImm* v = dynamic_cast<const IntImm*>(dim);
+    if (v) {
+      size *= v->value();
+    } else {
+      throw std::runtime_error("Only IntImm dimensions are supported for now");
+    }
+  }
+
+  emitIndent();
+  if (size <= kAllocOnStackThresholdSize) {
+    os() << alloc->dtype().ToCppString() << " " << (*alloc->buffer_var()) << "["
+         << size << "];" << std::endl;
+  } else {
+    size *= alloc->dtype().byte_size();
+    os() << alloc->dtype().ToCppString() << "* " << (*alloc->buffer_var())
+         << " = static_cast<" << alloc->dtype().ToCppString() << "*>(malloc("
+         << size << "));" << std::endl;
+    allocated_on_heap_.insert(alloc->buffer_var());
+  }
+}
+
+void CppPrinter::visit(const Free* free) {
+  const Var* var = free->buffer_var();
+  if (allocated_on_heap_.count(var)) {
+    emitIndent();
+    os() << "free(" << name_manager()->get_unique_name(var) << ");"
+         << std::endl;
+  }
+}
+
+} // namespace tensorexpr
+} // namespace jit
+} // namespace torch
diff --git a/torch/csrc/jit/tensorexpr/cpp_codegen.h b/torch/csrc/jit/tensorexpr/cpp_codegen.h
@@ -0,0 +1,26 @@
+#pragma once
+
+#include <torch/csrc/jit/tensorexpr/ir_printer.h>
+
+#include <unordered_set>
+
+namespace torch {
+namespace jit {
+namespace tensorexpr {
+
+// Generates C++ code from the IR.
+class TORCH_API CppPrinter : public IRPrinter {
+ public:
+  explicit CppPrinter(std::ostream* os) : IRPrinter(*os) {}
+
+  using IRPrinter::visit;
+  void visit(const Allocate*) override;
+  void visit(const Free*) override;
+
+ private:
+  std::unordered_set<const Var*> allocated_on_heap_;
+};
+
+} // namespace tensorexpr
+} // namespace jit
+} // namespace torch