Skip to content

Commit

Permalink
[Enhancement] Support tvm 1.x (#1531)
Browse files Browse the repository at this point in the history
* support tvm 1.x

* fix tvm sdk export
  • Loading branch information
grimoire committed Dec 21, 2022
1 parent 6e91614 commit 0e65606
Show file tree
Hide file tree
Showing 57 changed files with 2,316 additions and 45 deletions.
1 change: 1 addition & 0 deletions .codespell_ignore.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
cann
CANN
nd
24 changes: 12 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,18 +58,18 @@ The supported Device-Platform-InferenceBackend matrix is presented as following,

The benchmark can be found from [here](docs/en/03-benchmark/benchmark.md)

| Device / Platform | Linux | Windows | macOS | Android |
| ----------------- | --------------------------------------------------------------- | --------------------------------------- | -------- | ---------------- |
| x86_64 CPU | ✔️ONNX Runtime<br>✔️pplnn<br>✔️ncnn<br>✔️OpenVINO<br>✔️LibTorch | ✔️ONNX Runtime<br>✔️OpenVINO | - | - |
| ARM CPU | ✔️ncnn | - | - | ✔️ncnn |
| RISC-V | ✔️ncnn | - | - | - |
| NVIDIA GPU | ✔️ONNX Runtime<br>✔️TensorRT<br>✔️pplnn<br>✔️LibTorch | ✔️ONNX Runtime<br>✔️TensorRT<br>✔️pplnn | - | - |
| NVIDIA Jetson | ✔️TensorRT | ✔️TensorRT | - | - |
| Huawei ascend310 | ✔️CANN | - | - | - |
| Rockchip | ✔️RKNN | - | - | - |
| Apple M1 | - | - | ✔️CoreML | - |
| Adreno GPU | - | - | - | ✔️ncnn<br>✔️SNPE |
| Hexagon DSP | - | - | - | ✔️SNPE |
| Device / Platform | Linux | Windows | macOS | Android |
| ----------------- | ------------------------------------------------------------------------ | --------------------------------------- | -------- | ---------------- |
| x86_64 CPU | ✔️ONNX Runtime<br>✔️pplnn<br>✔️ncnn<br>✔️OpenVINO<br>✔️LibTorch<br>✔️TVM | ✔️ONNX Runtime<br>✔️OpenVINO | - | - |
| ARM CPU | ✔️ncnn | - | - | ✔️ncnn |
| RISC-V | ✔️ncnn | - | - | - |
| NVIDIA GPU | ✔️ONNX Runtime<br>✔️TensorRT<br>✔️pplnn<br>✔️LibTorch<br>✔️TVM | ✔️ONNX Runtime<br>✔️TensorRT<br>✔️pplnn | - | - |
| NVIDIA Jetson | ✔️TensorRT | ✔️TensorRT | - | - |
| Huawei ascend310 | ✔️CANN | - | - | - |
| Rockchip | ✔️RKNN | - | - | - |
| Apple M1 | - | - | ✔️CoreML | - |
| Adreno GPU | - | - | - | ✔️ncnn<br>✔️SNPE |
| Hexagon DSP | - | - | - | ✔️SNPE |

### Efficient and scalable C/C++ SDK Framework

Expand Down
24 changes: 12 additions & 12 deletions README_zh-CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,18 +56,18 @@ MMDeploy 是 [OpenMMLab](https://openmmlab.com/) 模型部署工具箱,**为

支持的设备平台和推理引擎如下表所示。benchmark请参考[这里](docs/zh_cn/03-benchmark/benchmark.md)

| Device / Platform | Linux | Windows | macOS | Android |
| ----------------- | --------------------------------------------------------------- | --------------------------------------- | -------- | ---------------- |
| x86_64 CPU | ✔️ONNX Runtime<br>✔️pplnn<br>✔️ncnn<br>✔️OpenVINO<br>✔️LibTorch | ✔️ONNX Runtime<br>✔️OpenVINO | - | - |
| ARM CPU | ✔️ncnn | - | - | ✔️ncnn |
| RISC-V | ✔️ncnn | - | - | - |
| NVIDIA GPU | ✔️ONNX Runtime<br>✔️TensorRT<br>✔️pplnn<br>✔️LibTorch | ✔️ONNX Runtime<br>✔️TensorRT<br>✔️pplnn | - | - |
| NVIDIA Jetson | ✔️TensorRT | ✔️TensorRT | - | - |
| Huawei ascend310 | ✔️CANN | - | - | - |
| Rockchip | ✔️RKNN | - | - | - |
| Apple M1 | - | - | ✔️CoreML | - |
| Adreno GPU | - | - | - | ✔️ncnn<br>✔️SNPE |
| Hexagon DSP | - | - | - | ✔️SNPE |
| Device / Platform | Linux | Windows | macOS | Android |
| ----------------- | ------------------------------------------------------------------------ | --------------------------------------- | -------- | ---------------- |
| x86_64 CPU | ✔️ONNX Runtime<br>✔️pplnn<br>✔️ncnn<br>✔️OpenVINO<br>✔️LibTorch<br>✔️TVM | ✔️ONNX Runtime<br>✔️OpenVINO | - | - |
| ARM CPU | ✔️ncnn | - | - | ✔️ncnn |
| RISC-V | ✔️ncnn | - | - | - |
| NVIDIA GPU | ✔️ONNX Runtime<br>✔️TensorRT<br>✔️pplnn<br>✔️LibTorch<br>✔️TVM | ✔️ONNX Runtime<br>✔️TensorRT<br>✔️pplnn | - | - |
| NVIDIA Jetson | ✔️TensorRT | ✔️TensorRT | - | - |
| Huawei ascend310 | ✔️CANN | - | - | - |
| Rockchip | ✔️RKNN | - | - | - |
| Apple M1 | - | - | ✔️CoreML | - |
| Adreno GPU | - | - | - | ✔️ncnn<br>✔️SNPE |
| Hexagon DSP | - | - | - | ✔️SNPE |

### SDK 可高度定制化

Expand Down
47 changes: 47 additions & 0 deletions cmake/modules/FindTVM.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Copyright (c) OpenMMLab. All rights reserved.

if (NOT DEFINED TVM_DIR)
set(TVM_DIR $ENV{TVM_DIR})
endif ()
if (NOT TVM_DIR)
message(FATAL_ERROR "Please set TVM_DIR with cmake -D option.")
endif()

find_path(
TVM_INCLUDE_DIR tvm/runtime/c_runtime_api.h
HINTS ${TVM_DIR}
PATH_SUFFIXES include)

find_path(
DMLC_CORE_INCLUDE_DIR dmlc/io.h
HINTS ${TVM_DIR}/3rdparty/dmlc-core
PATH_SUFFIXES include)

find_path(
DLPACK_INCLUDE_DIR dlpack/dlpack.h
HINTS ${TVM_DIR}/3rdparty/dlpack
PATH_SUFFIXES include)

find_library(
TVM_LIBRARY_PATH tvm_runtime
HINTS ${TVM_DIR}
PATH_SUFFIXES build lib build/${CMAKE_BUILD_TYPE})
if (NOT (TVM_INCLUDE_DIR AND DMLC_CORE_INCLUDE_DIR AND DLPACK_INCLUDE_DIR AND TVM_LIBRARY_PATH))
message(FATAL_ERROR "Couldn't find tvm in TVM_DIR: "
"${TVM_DIR}, please check if the path is correct.")
endif()

add_library(tvm_runtime SHARED IMPORTED)
set_property(TARGET tvm_runtime APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
if (MSVC)
set_target_properties(tvm_runtime PROPERTIES
IMPORTED_IMPLIB_RELEASE ${TVM_LIBRARY_PATH}
INTERFACE_INCLUDE_DIRECTORIES ${TVM_INCLUDE_DIR} ${DMLC_CORE_INCLUDE_DIR} ${DLPACK_INCLUDE_DIR}
)

else()
set_target_properties(tvm_runtime PROPERTIES
IMPORTED_LOCATION_RELEASE ${TVM_LIBRARY_PATH}
INTERFACE_INCLUDE_DIRECTORIES ${TVM_INCLUDE_DIR} ${DMLC_CORE_INCLUDE_DIR} ${DLPACK_INCLUDE_DIR}
)
endif()
1 change: 1 addition & 0 deletions configs/_base_/backends/tvm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
backend_config = dict(type='tvm')
12 changes: 12 additions & 0 deletions configs/mmcls/classification_tvm-ansor_static-224x224.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
_base_ = ['./classification_static.py', '../_base_/backends/tvm.py']

onnx_config = dict(input_shape=[224, 224])
backend_config = dict(model_inputs=[
dict(
shape=dict(input=[1, 3, 224, 224]),
dtype=dict(input='float32'),
tuner=dict(
type='AutoScheduleTuner',
log_file='tvm_tune_log.log',
num_measure_trials=2000))
])
16 changes: 16 additions & 0 deletions configs/mmcls/classification_tvm-autotvm-int8_static-224x224.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
_base_ = ['./classification_tvm-autotvm_static-224x224.py']

calib_config = dict(create_calib=True, calib_file='calib_data.h5')
backend_config = dict(model_inputs=[
dict(
shape=dict(input=[1, 3, 224, 224]),
dtype=dict(input='float32'),
tuner=dict(
type='AutoTVMTuner',
log_file='tvm_tune_log.log',
n_trial=1000,
tuner=dict(type='XGBTuner'),
),
qconfig=dict(calibrate_mode='kl_divergence', weight_scale='max'),
)
])
14 changes: 14 additions & 0 deletions configs/mmcls/classification_tvm-autotvm_static-224x224.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
_base_ = ['./classification_static.py', '../_base_/backends/tvm.py']

onnx_config = dict(input_shape=[224, 224])
backend_config = dict(model_inputs=[
dict(
shape=dict(input=[1, 3, 224, 224]),
dtype=dict(input='float32'),
tuner=dict(
type='AutoTVMTuner',
log_file='tvm_tune_log.log',
n_trial=1000,
tuner=dict(type='XGBTuner'),
))
])
13 changes: 13 additions & 0 deletions configs/mmdet/detection/detection_tvm-ansor_static-800x1344.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
_base_ = ['../_base_/base_static.py', '../../_base_/backends/tvm.py']

onnx_config = dict(input_shape=[1344, 800])
backend_config = dict(model_inputs=[
dict(
use_vm=True,
shape=dict(input=[1, 3, 800, 1344]),
dtype=dict(input='float32'),
tuner=dict(
type='AutoScheduleTuner',
log_file='tvm_tune_log.log',
num_measure_trials=2000))
])
15 changes: 15 additions & 0 deletions configs/mmdet/detection/detection_tvm-autotvm_static-300x300.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
_base_ = ['../_base_/base_static.py', '../../_base_/backends/tvm.py']

onnx_config = dict(input_shape=[300, 300])
backend_config = dict(model_inputs=[
dict(
use_vm=True,
shape=dict(input=[1, 3, 300, 300]),
dtype=dict(input='float32'),
tuner=dict(
type='AutoTVMTuner',
log_file='tvm_tune_log.log',
n_trial=1000,
tuner=dict(type='XGBTuner'),
))
])
15 changes: 15 additions & 0 deletions configs/mmdet/detection/detection_tvm-autotvm_static-800x1344.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
_base_ = ['../_base_/base_static.py', '../../_base_/backends/tvm.py']

onnx_config = dict(input_shape=[1344, 800])
backend_config = dict(model_inputs=[
dict(
use_vm=True,
shape=dict(input=[1, 3, 800, 1344]),
dtype=dict(input='float32'),
tuner=dict(
type='AutoTVMTuner',
log_file='tvm_tune_log.log',
n_trial=1000,
tuner=dict(type='XGBTuner'),
))
])
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
_base_ = [
'../_base_/base_instance-seg_static.py', '../../_base_/backends/tvm.py'
]

onnx_config = dict(input_shape=[1344, 800])
backend_config = dict(model_inputs=[
dict(
use_vm=True,
shape=dict(input=[1, 3, 800, 1344]),
dtype=dict(input='float32'),
tuner=dict(
type='AutoScheduleTuner',
log_file='tvm_tune_log.log',
num_measure_trials=20000))
])
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
_base_ = [
'../_base_/base_instance-seg_static.py', '../../_base_/backends/tvm.py'
]

onnx_config = dict(input_shape=[1344, 800])
backend_config = dict(model_inputs=[
dict(
use_vm=True,
shape=dict(input=[1, 3, 800, 1344]),
dtype=dict(input='float32'),
tuner=dict(
type='AutoTVMTuner',
log_file='tvm_tune_log.log',
n_trial=10000,
tuner=dict(type='XGBTuner'),
))
])
12 changes: 12 additions & 0 deletions configs/mmseg/segmentation_tvm-ansor_static-512x1024.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
_base_ = ['./segmentation_static.py', '../_base_/backends/tvm.py']

onnx_config = dict(input_shape=[1024, 512])
backend_config = dict(model_inputs=[
dict(
shape=dict(input=[1, 3, 512, 1024]),
dtype=dict(input='float32'),
tuner=dict(
type='AutoScheduleTuner',
log_file='tvm_tune_log.log',
num_measure_trials=2000))
])
13 changes: 13 additions & 0 deletions configs/mmseg/segmentation_tvm-autotvm_static-512x1024.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
_base_ = ['./segmentation_static.py', '../_base_/backends/tvm.py']

onnx_config = dict(input_shape=[1024, 512])
backend_config = dict(model_inputs=[
dict(
shape=dict(input=[1, 3, 512, 1024]),
dtype=dict(input='float32'),
tuner=dict(
type='AutoTVMTuner',
log_file='tvm_tune_log.log',
n_trial=1000,
tuner=dict(type='XGBTuner')))
])
18 changes: 13 additions & 5 deletions csrc/mmdeploy/device/cpu/cpu_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,16 @@ class CpuHostMemory : public NonCopyable {
public:
CpuHostMemory() : size_(), data_(), owned_data_{false} {}
Result<void> Init(size_t size, size_t alignment) {
if (alignment != 1) {
return Status(eNotSupported);
}
data_ = std::malloc(size);
size_t space = (size + alignment - 1) / alignment * alignment;
#ifdef _MSC_VER
data_ = _aligned_malloc(space, alignment);
#else
data_ = std::aligned_alloc(alignment, space);
#endif
if (!data_) {
return Status(eOutOfMemory);
}
aligned_data_ = data_;
size_ = size;
owned_data_ = true;
return success();
Expand All @@ -38,7 +41,11 @@ class CpuHostMemory : public NonCopyable {
~CpuHostMemory() {
if (data_) {
if (owned_data_) {
#ifdef _MSC_VER
_aligned_free(data_);
#else
std::free(data_);
#endif
owned_data_ = false;
}
data_ = nullptr;
Expand All @@ -47,11 +54,12 @@ class CpuHostMemory : public NonCopyable {
size_ = 0;
}
size_t size() const { return size_; }
void* data() const { return data_; }
void* data() const { return owned_data_ ? aligned_data_ : data_; }

private:
size_t size_;
void* data_;
void* aligned_data_{nullptr};
bool owned_data_;
std::shared_ptr<void> external_;
};
Expand Down
2 changes: 1 addition & 1 deletion csrc/mmdeploy/device/cuda/cuda_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ class CudaDeviceMemory : public NonCopyable {
public:
explicit CudaDeviceMemory(int device_id) : device_id_(device_id), size_(), owned_block_() {}
Result<void> Init(size_t size, Allocator allocator, size_t alignment, uint64_t flags) {
if (alignment != 1) {
if (alignment > 256 || 256 % alignment != 0) {
return Status(eNotSupported);
}
allocator_ = std::move(allocator);
Expand Down
4 changes: 4 additions & 0 deletions csrc/mmdeploy/net/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,9 @@ if ("rknn" IN_LIST MMDEPLOY_TARGET_BACKENDS)
add_subdirectory(rknn)
endif ()

if ("tvm" IN_LIST MMDEPLOY_TARGET_BACKENDS)
add_subdirectory(tvm)
endif ()

mmdeploy_add_module(${PROJECT_NAME} net_module.cpp)
add_library(mmdeploy::net_module ALIAS ${PROJECT_NAME})
11 changes: 11 additions & 0 deletions csrc/mmdeploy/net/tvm/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Copyright (c) OpenMMLab. All rights reserved.

project(mmdeploy_tvm_net)

include(${CMAKE_SOURCE_DIR}/cmake/modules/FindTVM.cmake)

mmdeploy_add_module(${PROJECT_NAME} tvm_net.cpp)
target_include_directories(${PROJECT_NAME} PRIVATE ${TVM_INCLUDE_DIR} ${DLPACK_INCLUDE_DIR} ${DMLC_CORE_INCLUDE_DIR})
target_link_libraries(${PROJECT_NAME} PRIVATE tvm_runtime mmdeploy_dlpack_utils)

add_library(mmdeploy::tvm_net ALIAS ${PROJECT_NAME})
Loading

0 comments on commit 0e65606

Please sign in to comment.