[Enhancement] Support tvm 1.x (#1531)

* support tvm 1.x * fix tvm sdk export
open-mmlab · Dec 21, 2022 · 0e65606 · 0e65606
1 parent 6e91614
commit 0e65606
Show file tree

Hide file tree

Showing 57 changed files with 2,316 additions and 45 deletions.
diff --git a/.codespell_ignore.txt b/.codespell_ignore.txt
@@ -1,2 +1,3 @@
 cann
 CANN
+nd
diff --git a/README.md b/README.md
@@ -58,18 +58,18 @@ The supported Device-Platform-InferenceBackend matrix is presented as following,
 
 The benchmark can be found from [here](docs/en/03-benchmark/benchmark.md)
 
-| Device / Platform | Linux                                                           | Windows                                 | macOS    | Android          |
-| ----------------- | --------------------------------------------------------------- | --------------------------------------- | -------- | ---------------- |
-| x86_64 CPU        | ✔️ONNX Runtime<br>✔️pplnn<br>✔️ncnn<br>✔️OpenVINO<br>✔️LibTorch | ✔️ONNX Runtime<br>✔️OpenVINO            | -        | -                |
-| ARM CPU           | ✔️ncnn                                                          | -                                       | -        | ✔️ncnn           |
-| RISC-V            | ✔️ncnn                                                          | -                                       | -        | -                |
-| NVIDIA GPU        | ✔️ONNX Runtime<br>✔️TensorRT<br>✔️pplnn<br>✔️LibTorch           | ✔️ONNX Runtime<br>✔️TensorRT<br>✔️pplnn | -        | -                |
-| NVIDIA Jetson     | ✔️TensorRT                                                      | ✔️TensorRT                              | -        | -                |
-| Huawei ascend310  | ✔️CANN                                                          | -                                       | -        | -                |
-| Rockchip          | ✔️RKNN                                                          | -                                       | -        | -                |
-| Apple M1          | -                                                               | -                                       | ✔️CoreML | -                |
-| Adreno GPU        | -                                                               | -                                       | -        | ✔️ncnn<br>✔️SNPE |
-| Hexagon DSP       | -                                                               | -                                       | -        | ✔️SNPE           |
+| Device / Platform | Linux                                                                    | Windows                                 | macOS    | Android          |
+| ----------------- | ------------------------------------------------------------------------ | --------------------------------------- | -------- | ---------------- |
+| x86_64 CPU        | ✔️ONNX Runtime<br>✔️pplnn<br>✔️ncnn<br>✔️OpenVINO<br>✔️LibTorch<br>✔️TVM | ✔️ONNX Runtime<br>✔️OpenVINO            | -        | -                |
+| ARM CPU           | ✔️ncnn                                                                   | -                                       | -        | ✔️ncnn           |
+| RISC-V            | ✔️ncnn                                                                   | -                                       | -        | -                |
+| NVIDIA GPU        | ✔️ONNX Runtime<br>✔️TensorRT<br>✔️pplnn<br>✔️LibTorch<br>✔️TVM           | ✔️ONNX Runtime<br>✔️TensorRT<br>✔️pplnn | -        | -                |
+| NVIDIA Jetson     | ✔️TensorRT                                                               | ✔️TensorRT                              | -        | -                |
+| Huawei ascend310  | ✔️CANN                                                                   | -                                       | -        | -                |
+| Rockchip          | ✔️RKNN                                                                   | -                                       | -        | -                |
+| Apple M1          | -                                                                        | -                                       | ✔️CoreML | -                |
+| Adreno GPU        | -                                                                        | -                                       | -        | ✔️ncnn<br>✔️SNPE |
+| Hexagon DSP       | -                                                                        | -                                       | -        | ✔️SNPE           |
 
 ### Efficient and scalable C/C++ SDK Framework
 

diff --git a/README_zh-CN.md b/README_zh-CN.md
@@ -56,18 +56,18 @@ MMDeploy 是 [OpenMMLab](https://openmmlab.com/) 模型部署工具箱，**为
 
 支持的设备平台和推理引擎如下表所示。benchmark请参考[这里](docs/zh_cn/03-benchmark/benchmark.md)
 
-| Device / Platform | Linux                                                           | Windows                                 | macOS    | Android          |
-| ----------------- | --------------------------------------------------------------- | --------------------------------------- | -------- | ---------------- |
-| x86_64 CPU        | ✔️ONNX Runtime<br>✔️pplnn<br>✔️ncnn<br>✔️OpenVINO<br>✔️LibTorch | ✔️ONNX Runtime<br>✔️OpenVINO            | -        | -                |
-| ARM CPU           | ✔️ncnn                                                          | -                                       | -        | ✔️ncnn           |
-| RISC-V            | ✔️ncnn                                                          | -                                       | -        | -                |
-| NVIDIA GPU        | ✔️ONNX Runtime<br>✔️TensorRT<br>✔️pplnn<br>✔️LibTorch           | ✔️ONNX Runtime<br>✔️TensorRT<br>✔️pplnn | -        | -                |
-| NVIDIA Jetson     | ✔️TensorRT                                                      | ✔️TensorRT                              | -        | -                |
-| Huawei ascend310  | ✔️CANN                                                          | -                                       | -        | -                |
-| Rockchip          | ✔️RKNN                                                          | -                                       | -        | -                |
-| Apple M1          | -                                                               | -                                       | ✔️CoreML | -                |
-| Adreno GPU        | -                                                               | -                                       | -        | ✔️ncnn<br>✔️SNPE |
-| Hexagon DSP       | -                                                               | -                                       | -        | ✔️SNPE           |
+| Device / Platform | Linux                                                                    | Windows                                 | macOS    | Android          |
+| ----------------- | ------------------------------------------------------------------------ | --------------------------------------- | -------- | ---------------- |
+| x86_64 CPU        | ✔️ONNX Runtime<br>✔️pplnn<br>✔️ncnn<br>✔️OpenVINO<br>✔️LibTorch<br>✔️TVM | ✔️ONNX Runtime<br>✔️OpenVINO            | -        | -                |
+| ARM CPU           | ✔️ncnn                                                                   | -                                       | -        | ✔️ncnn           |
+| RISC-V            | ✔️ncnn                                                                   | -                                       | -        | -                |
+| NVIDIA GPU        | ✔️ONNX Runtime<br>✔️TensorRT<br>✔️pplnn<br>✔️LibTorch<br>✔️TVM           | ✔️ONNX Runtime<br>✔️TensorRT<br>✔️pplnn | -        | -                |
+| NVIDIA Jetson     | ✔️TensorRT                                                               | ✔️TensorRT                              | -        | -                |
+| Huawei ascend310  | ✔️CANN                                                                   | -                                       | -        | -                |
+| Rockchip          | ✔️RKNN                                                                   | -                                       | -        | -                |
+| Apple M1          | -                                                                        | -                                       | ✔️CoreML | -                |
+| Adreno GPU        | -                                                                        | -                                       | -        | ✔️ncnn<br>✔️SNPE |
+| Hexagon DSP       | -                                                                        | -                                       | -        | ✔️SNPE           |
 
 ### SDK 可高度定制化
 

diff --git a/cmake/modules/FindTVM.cmake b/cmake/modules/FindTVM.cmake
@@ -0,0 +1,47 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+
+if (NOT DEFINED TVM_DIR)
+    set(TVM_DIR $ENV{TVM_DIR})
+endif ()
+if (NOT TVM_DIR)
+    message(FATAL_ERROR "Please set TVM_DIR with cmake -D option.")
+endif()
+
+find_path(
+    TVM_INCLUDE_DIR tvm/runtime/c_runtime_api.h
+    HINTS ${TVM_DIR}
+    PATH_SUFFIXES include)
+
+find_path(
+    DMLC_CORE_INCLUDE_DIR  dmlc/io.h
+    HINTS ${TVM_DIR}/3rdparty/dmlc-core
+    PATH_SUFFIXES include)
+
+find_path(
+    DLPACK_INCLUDE_DIR dlpack/dlpack.h
+    HINTS ${TVM_DIR}/3rdparty/dlpack
+    PATH_SUFFIXES include)
+
+find_library(
+    TVM_LIBRARY_PATH tvm_runtime
+    HINTS ${TVM_DIR}
+    PATH_SUFFIXES build lib build/${CMAKE_BUILD_TYPE})
+if (NOT (TVM_INCLUDE_DIR AND DMLC_CORE_INCLUDE_DIR AND DLPACK_INCLUDE_DIR AND TVM_LIBRARY_PATH))
+    message(FATAL_ERROR "Couldn't find tvm in TVM_DIR: "
+        "${TVM_DIR}, please check if the path is correct.")
+endif()
+
+add_library(tvm_runtime SHARED IMPORTED)
+set_property(TARGET tvm_runtime APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
+if (MSVC)
+    set_target_properties(tvm_runtime PROPERTIES
+        IMPORTED_IMPLIB_RELEASE ${TVM_LIBRARY_PATH}
+        INTERFACE_INCLUDE_DIRECTORIES ${TVM_INCLUDE_DIR} ${DMLC_CORE_INCLUDE_DIR} ${DLPACK_INCLUDE_DIR}
+    )
+
+else()
+    set_target_properties(tvm_runtime PROPERTIES
+        IMPORTED_LOCATION_RELEASE ${TVM_LIBRARY_PATH}
+        INTERFACE_INCLUDE_DIRECTORIES ${TVM_INCLUDE_DIR} ${DMLC_CORE_INCLUDE_DIR} ${DLPACK_INCLUDE_DIR}
+    )
+endif()
diff --git a/configs/_base_/backends/tvm.py b/configs/_base_/backends/tvm.py
@@ -0,0 +1 @@
+backend_config = dict(type='tvm')
diff --git a/configs/mmcls/classification_tvm-ansor_static-224x224.py b/configs/mmcls/classification_tvm-ansor_static-224x224.py
@@ -0,0 +1,12 @@
+_base_ = ['./classification_static.py', '../_base_/backends/tvm.py']
+
+onnx_config = dict(input_shape=[224, 224])
+backend_config = dict(model_inputs=[
+    dict(
+        shape=dict(input=[1, 3, 224, 224]),
+        dtype=dict(input='float32'),
+        tuner=dict(
+            type='AutoScheduleTuner',
+            log_file='tvm_tune_log.log',
+            num_measure_trials=2000))
+])
diff --git a/configs/mmcls/classification_tvm-autotvm-int8_static-224x224.py b/configs/mmcls/classification_tvm-autotvm-int8_static-224x224.py
@@ -0,0 +1,16 @@
+_base_ = ['./classification_tvm-autotvm_static-224x224.py']
+
+calib_config = dict(create_calib=True, calib_file='calib_data.h5')
+backend_config = dict(model_inputs=[
+    dict(
+        shape=dict(input=[1, 3, 224, 224]),
+        dtype=dict(input='float32'),
+        tuner=dict(
+            type='AutoTVMTuner',
+            log_file='tvm_tune_log.log',
+            n_trial=1000,
+            tuner=dict(type='XGBTuner'),
+        ),
+        qconfig=dict(calibrate_mode='kl_divergence', weight_scale='max'),
+    )
+])
diff --git a/configs/mmcls/classification_tvm-autotvm_static-224x224.py b/configs/mmcls/classification_tvm-autotvm_static-224x224.py
@@ -0,0 +1,14 @@
+_base_ = ['./classification_static.py', '../_base_/backends/tvm.py']
+
+onnx_config = dict(input_shape=[224, 224])
+backend_config = dict(model_inputs=[
+    dict(
+        shape=dict(input=[1, 3, 224, 224]),
+        dtype=dict(input='float32'),
+        tuner=dict(
+            type='AutoTVMTuner',
+            log_file='tvm_tune_log.log',
+            n_trial=1000,
+            tuner=dict(type='XGBTuner'),
+        ))
+])
diff --git a/configs/mmdet/detection/detection_tvm-ansor_static-800x1344.py b/configs/mmdet/detection/detection_tvm-ansor_static-800x1344.py
@@ -0,0 +1,13 @@
+_base_ = ['../_base_/base_static.py', '../../_base_/backends/tvm.py']
+
+onnx_config = dict(input_shape=[1344, 800])
+backend_config = dict(model_inputs=[
+    dict(
+        use_vm=True,
+        shape=dict(input=[1, 3, 800, 1344]),
+        dtype=dict(input='float32'),
+        tuner=dict(
+            type='AutoScheduleTuner',
+            log_file='tvm_tune_log.log',
+            num_measure_trials=2000))
+])
diff --git a/configs/mmdet/detection/detection_tvm-autotvm_static-300x300.py b/configs/mmdet/detection/detection_tvm-autotvm_static-300x300.py
@@ -0,0 +1,15 @@
+_base_ = ['../_base_/base_static.py', '../../_base_/backends/tvm.py']
+
+onnx_config = dict(input_shape=[300, 300])
+backend_config = dict(model_inputs=[
+    dict(
+        use_vm=True,
+        shape=dict(input=[1, 3, 300, 300]),
+        dtype=dict(input='float32'),
+        tuner=dict(
+            type='AutoTVMTuner',
+            log_file='tvm_tune_log.log',
+            n_trial=1000,
+            tuner=dict(type='XGBTuner'),
+        ))
+])
diff --git a/configs/mmdet/detection/detection_tvm-autotvm_static-800x1344.py b/configs/mmdet/detection/detection_tvm-autotvm_static-800x1344.py
@@ -0,0 +1,15 @@
+_base_ = ['../_base_/base_static.py', '../../_base_/backends/tvm.py']
+
+onnx_config = dict(input_shape=[1344, 800])
+backend_config = dict(model_inputs=[
+    dict(
+        use_vm=True,
+        shape=dict(input=[1, 3, 800, 1344]),
+        dtype=dict(input='float32'),
+        tuner=dict(
+            type='AutoTVMTuner',
+            log_file='tvm_tune_log.log',
+            n_trial=1000,
+            tuner=dict(type='XGBTuner'),
+        ))
+])
diff --git a/configs/mmdet/instance-seg/instance-seg_tvm-ansor_static-800x1344.py b/configs/mmdet/instance-seg/instance-seg_tvm-ansor_static-800x1344.py
@@ -0,0 +1,15 @@
+_base_ = [
+    '../_base_/base_instance-seg_static.py', '../../_base_/backends/tvm.py'
+]
+
+onnx_config = dict(input_shape=[1344, 800])
+backend_config = dict(model_inputs=[
+    dict(
+        use_vm=True,
+        shape=dict(input=[1, 3, 800, 1344]),
+        dtype=dict(input='float32'),
+        tuner=dict(
+            type='AutoScheduleTuner',
+            log_file='tvm_tune_log.log',
+            num_measure_trials=20000))
+])
diff --git a/configs/mmdet/instance-seg/instance-seg_tvm-autotvm_static-800x1344.py b/configs/mmdet/instance-seg/instance-seg_tvm-autotvm_static-800x1344.py
@@ -0,0 +1,17 @@
+_base_ = [
+    '../_base_/base_instance-seg_static.py', '../../_base_/backends/tvm.py'
+]
+
+onnx_config = dict(input_shape=[1344, 800])
+backend_config = dict(model_inputs=[
+    dict(
+        use_vm=True,
+        shape=dict(input=[1, 3, 800, 1344]),
+        dtype=dict(input='float32'),
+        tuner=dict(
+            type='AutoTVMTuner',
+            log_file='tvm_tune_log.log',
+            n_trial=10000,
+            tuner=dict(type='XGBTuner'),
+        ))
+])
diff --git a/configs/mmseg/segmentation_tvm-ansor_static-512x1024.py b/configs/mmseg/segmentation_tvm-ansor_static-512x1024.py
@@ -0,0 +1,12 @@
+_base_ = ['./segmentation_static.py', '../_base_/backends/tvm.py']
+
+onnx_config = dict(input_shape=[1024, 512])
+backend_config = dict(model_inputs=[
+    dict(
+        shape=dict(input=[1, 3, 512, 1024]),
+        dtype=dict(input='float32'),
+        tuner=dict(
+            type='AutoScheduleTuner',
+            log_file='tvm_tune_log.log',
+            num_measure_trials=2000))
+])
diff --git a/configs/mmseg/segmentation_tvm-autotvm_static-512x1024.py b/configs/mmseg/segmentation_tvm-autotvm_static-512x1024.py
@@ -0,0 +1,13 @@
+_base_ = ['./segmentation_static.py', '../_base_/backends/tvm.py']
+
+onnx_config = dict(input_shape=[1024, 512])
+backend_config = dict(model_inputs=[
+    dict(
+        shape=dict(input=[1, 3, 512, 1024]),
+        dtype=dict(input='float32'),
+        tuner=dict(
+            type='AutoTVMTuner',
+            log_file='tvm_tune_log.log',
+            n_trial=1000,
+            tuner=dict(type='XGBTuner')))
+])
diff --git a/csrc/mmdeploy/device/cpu/cpu_device.cpp b/csrc/mmdeploy/device/cpu/cpu_device.cpp
@@ -11,13 +11,16 @@ class CpuHostMemory : public NonCopyable {
  public:
   CpuHostMemory() : size_(), data_(), owned_data_{false} {}
   Result<void> Init(size_t size, size_t alignment) {
-    if (alignment != 1) {
-      return Status(eNotSupported);
-    }
-    data_ = std::malloc(size);
+    size_t space = (size + alignment - 1) / alignment * alignment;
+#ifdef _MSC_VER
+    data_ = _aligned_malloc(space, alignment);
+#else
+    data_ = std::aligned_alloc(alignment, space);
+#endif
     if (!data_) {
       return Status(eOutOfMemory);
     }
+    aligned_data_ = data_;
     size_ = size;
     owned_data_ = true;
     return success();
@@ -38,7 +41,11 @@ class CpuHostMemory : public NonCopyable {
   ~CpuHostMemory() {
     if (data_) {
       if (owned_data_) {
+#ifdef _MSC_VER
+        _aligned_free(data_);
+#else
         std::free(data_);
+#endif
         owned_data_ = false;
       }
       data_ = nullptr;
@@ -47,11 +54,12 @@ class CpuHostMemory : public NonCopyable {
     size_ = 0;
   }
   size_t size() const { return size_; }
-  void* data() const { return data_; }
+  void* data() const { return owned_data_ ? aligned_data_ : data_; }
 
  private:
   size_t size_;
   void* data_;
+  void* aligned_data_{nullptr};
   bool owned_data_;
   std::shared_ptr<void> external_;
 };

diff --git a/csrc/mmdeploy/device/cuda/cuda_device.cpp b/csrc/mmdeploy/device/cuda/cuda_device.cpp
@@ -69,7 +69,7 @@ class CudaDeviceMemory : public NonCopyable {
  public:
   explicit CudaDeviceMemory(int device_id) : device_id_(device_id), size_(), owned_block_() {}
   Result<void> Init(size_t size, Allocator allocator, size_t alignment, uint64_t flags) {
-    if (alignment != 1) {
+    if (alignment > 256 || 256 % alignment != 0) {
       return Status(eNotSupported);
     }
     allocator_ = std::move(allocator);

diff --git a/csrc/mmdeploy/net/CMakeLists.txt b/csrc/mmdeploy/net/CMakeLists.txt
@@ -42,5 +42,9 @@ if ("rknn" IN_LIST MMDEPLOY_TARGET_BACKENDS)
     add_subdirectory(rknn)
 endif ()
 
+if ("tvm" IN_LIST MMDEPLOY_TARGET_BACKENDS)
+    add_subdirectory(tvm)
+endif ()
+
 mmdeploy_add_module(${PROJECT_NAME} net_module.cpp)
 add_library(mmdeploy::net_module ALIAS ${PROJECT_NAME})
diff --git a/csrc/mmdeploy/net/tvm/CMakeLists.txt b/csrc/mmdeploy/net/tvm/CMakeLists.txt
@@ -0,0 +1,11 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+
+project(mmdeploy_tvm_net)
+
+include(${CMAKE_SOURCE_DIR}/cmake/modules/FindTVM.cmake)
+
+mmdeploy_add_module(${PROJECT_NAME} tvm_net.cpp)
+target_include_directories(${PROJECT_NAME} PRIVATE ${TVM_INCLUDE_DIR} ${DLPACK_INCLUDE_DIR} ${DMLC_CORE_INCLUDE_DIR})
+target_link_libraries(${PROJECT_NAME} PRIVATE tvm_runtime mmdeploy_dlpack_utils)
+
+add_library(mmdeploy::tvm_net ALIAS ${PROJECT_NAME})