Skip to content

Commit

Permalink
Mobile Backend: NHWC memory layout + XNNPACK integration.
Browse files Browse the repository at this point in the history
  • Loading branch information
Ashkan Aliabadi committed Feb 18, 2020
1 parent 4468a7b commit 4b95293
Show file tree
Hide file tree
Showing 22 changed files with 1,081 additions and 170 deletions.
2 changes: 1 addition & 1 deletion .circleci/scripts/binary_ios_upload.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ mkdir -p ${ZIP_DIR}/src
cp -R ${ARTIFACTS_DIR}/arm64/include ${ZIP_DIR}/install/
# build a FAT bianry
cd ${ZIP_DIR}/install/lib
target_libs=(libc10.a libclog.a libcpuinfo.a libeigen_blas.a libpytorch_qnnpack.a libtorch_cpu.a libtorch.a)
target_libs=(libc10.a libclog.a libcpuinfo.a libeigen_blas.a libpytorch_qnnpack.a libtorch_cpu.a libtorch.a libXNNPACK.a)
for lib in ${target_libs[*]}
do
if [ -f "${ARTIFACTS_DIR}/x86_64/lib/${lib}" ] && [ -f "${ARTIFACTS_DIR}/arm64/lib/${lib}" ]; then
Expand Down
10 changes: 7 additions & 3 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,14 @@
path = third_party/foxi
url = https://github.com/houseroad/foxi.git
[submodule "third_party/tbb"]
path = third_party/tbb
url = https://github.com/01org/tbb
branch = tbb_2018
path = third_party/tbb
url = https://github.com/01org/tbb
branch = tbb_2018
[submodule "android/libs/fbjni"]
ignore = dirty
path = android/libs/fbjni
url = https://github.com/facebookincubator/fbjni.git
[submodule "third_party/XNNPACK"]
path = third_party/XNNPACK
url = https://github.com/AshkanAliabadi/XNNPACK.git
branch = xnnpack_pytorch_merge_temp
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ option(USE_SNPE "Use Qualcomm's SNPE library" OFF)
option(USE_SYSTEM_EIGEN_INSTALL
"Use system Eigen instead of the one under third_party" OFF)
option(USE_TENSORRT "Using Nvidia TensorRT library" OFF)
option(USE_XNNPACK "Use XNNPACK" ON)
option(USE_ZMQ "Use ZMQ" OFF)
option(USE_ZSTD "Use ZSTD" OFF)
cmake_dependent_option(
Expand Down Expand Up @@ -415,6 +416,10 @@ if(USE_PYTORCH_QNNPACK)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_PYTORCH_QNNPACK")
endif()

if(USE_XNNPACK)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_XNNPACK")
endif()

# ---[ Whitelist file if whitelist is specified
include(cmake/Whitelist.cmake)

Expand Down
3 changes: 3 additions & 0 deletions android/pytorch_android/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ if (ANDROID_ABI)
import_static_lib(libtorch_cpu)
import_static_lib(libc10)
import_static_lib(libnnpack)
import_static_lib(libXNNPACK)
import_static_lib(libpytorch_qnnpack)
import_static_lib(libeigen_blas)
import_static_lib(libcpuinfo)
Expand All @@ -98,6 +99,7 @@ if (ANDROID_ABI)
-Wl,--no-whole-archive
libc10
libnnpack
libXNNPACK
libpytorch_qnnpack
libeigen_blas
libcpuinfo
Expand All @@ -113,6 +115,7 @@ else()
torch_cpu
c10
nnpack
XNNPACK
pytorch_qnnpack
cpuinfo
clog
Expand Down
5 changes: 4 additions & 1 deletion aten/src/ATen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,11 @@ FILE(GLOB native_sparse_hip_cpp "native/sparse/hip/*.cpp")
FILE(GLOB native_quantized_hip_hip "native/quantized/hip/*.hip")
FILE(GLOB native_quantized_hip_cpp "native/quantized/hip/*.cpp")

# XNNPACK
FILE(GLOB native_xnnpack "native/xnnpack/*.cpp")

add_subdirectory(quantized)
set(all_cpu_cpp ${base_cpp} ${ATen_CORE_SRCS} ${native_cpp} ${native_sparse_cpp} ${native_quantized_cpp} ${native_mkl_cpp} ${native_mkldnn_cpp} ${generated_cpp} ${core_generated_cpp} ${ATen_CPU_SRCS} ${ATen_QUANTIZED_SRCS} ${cpu_kernel_cpp})
set(all_cpu_cpp ${base_cpp} ${ATen_CORE_SRCS} ${native_cpp} ${native_sparse_cpp} ${native_quantized_cpp} ${native_mkl_cpp} ${native_mkldnn_cpp} ${native_xnnpack} ${generated_cpp} ${core_generated_cpp} ${ATen_CPU_SRCS} ${ATen_QUANTIZED_SRCS} ${cpu_kernel_cpp})
if(AT_MKL_ENABLED)
set(all_cpu_cpp ${all_cpu_cpp} ${mkl_cpp})
endif()
Expand Down
8 changes: 8 additions & 0 deletions aten/src/ATen/native/native_functions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -775,6 +775,10 @@

- func: conv2d(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1, int[2] padding=0, int[2] dilation=1, int groups=1) -> Tensor

- func: _conv2d_prepack(Tensor weight, Tensor? bias=None, int[2] stride=1, int[2] padding=0, int[2] dilation=1, int groups=1, float? output_min=None, float? output_max=None) -> Tensor

- func: _conv2d_packed(Tensor packed_weight, Tensor input) -> Tensor

- func: conv3d(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] dilation=1, int groups=1) -> Tensor

- func: conv_tbc(Tensor self, Tensor weight, Tensor bias, int pad=0) -> Tensor
Expand Down Expand Up @@ -1563,6 +1567,10 @@
- func: linear(Tensor input, Tensor weight, Tensor? bias=None) -> Tensor
python_module: nn

- func: _linear_prepack(Tensor weight, Tensor? bias=None, float? output_min=None, float? output_max=None) -> Tensor

- func: _linear_packed(Tensor packed_weight, Tensor input) -> Tensor

- func: mkldnn_linear(Tensor input, Tensor weight, Tensor? bias=None) -> Tensor
python_module: nn
dispatch:
Expand Down
56 changes: 56 additions & 0 deletions aten/src/ATen/native/utils/Allocator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#pragma once

#include <c10/core/CPUAllocator.h>

namespace at {
namespace native {

// QNNPACK AND XNNPACK may out-of-bound access the input and / or output tensors.
// This behavior will trigger ASAN, and may result in a segfault if the accessed
// memory just so happens to fall on a page the current process has no read access
// to. Here we define a custom allocator that allocates the extra storage required
// to keep this behavior safe.
//
// PreGuardBytes: Number of guard bytes to allocate before the allocation.
// PostGuardBytes: Number of guard bytes to allocate after the allocation.

template <uint32_t PreGuardBytes, uint32_t PostGuardBytes>
class GuardingAllocator final : public at::Allocator {
public:
GuardingAllocator() = default;
virtual ~GuardingAllocator() override = default;

static void deleter(void* pointer) {
const Cast memory{pointer};
c10::free_cpu(memory.as_byte_ptr - kPreGuardBytes);
}

virtual DataPtr allocate(size_t nbytes) const override {
Cast memory{c10::alloc_cpu(kPreGuardBytes + nbytes + kPostGuardBytes)};
memory.as_byte_ptr += kPreGuardBytes;

return {
memory.as_void_ptr,
memory.as_void_ptr,
&deleter,
at::Device(DeviceType::CPU),
};
}

virtual DeleterFnPtr raw_deleter() const override {
return deleter;
}

private:
static constexpr uint32_t kPreGuardBytes = PreGuardBytes;
static constexpr uint32_t kPostGuardBytes = PostGuardBytes;

union Cast final {
void * const as_void_ptr;
uint8_t * as_byte_ptr;
};
};

} // namespace native
} // namespace at

82 changes: 82 additions & 0 deletions aten/src/ATen/native/xnnpack/Common.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#pragma once

#include <ATen/ATen.h>

#ifdef USE_XNNPACK

#include <xnnpack.h>

namespace at {
namespace native {
namespace xnnpack {
namespace internal {

struct Layout final {
// 4D Activation Maps
struct Activation4D final {
static constexpr size_t batch = 0u;
static constexpr size_t channels = 1u;
static constexpr size_t height = 2u;
static constexpr size_t width = 3u;
};

// ND Activation Maps
struct ActivationND final {
// Some operators may not be limited to 4 dimensional tensors. In that scenario,
// XNNPACK denotes that operator with an _nc suffix and expects all dimensions,
// except channels, to be flattened into one argument: batch_size.
static int64_t batch(const IntArrayRef tensor) {
if (C10_UNLIKELY(tensor.empty())) {
return -1;
}

// Handle the case where batch size is zero.
int64_t batch = std::max<int64_t>(1, tensor[0]);

for (size_t index = 1u; index < (tensor.size() - 1u); ++index) {
batch *= tensor[index];
}

return batch;
};

static int64_t channel(const IntArrayRef tensor) {
if (C10_UNLIKELY(tensor.empty())) {
return -1;
}

return tensor.back();
};
};

// Convolution Filters
struct Filter final {
static constexpr size_t output = 0u;
static constexpr size_t input = 1u;
static constexpr size_t height = 2u;
static constexpr size_t width = 3u;
};

// Parameters (Pooling Kernels, Dilation, Padding, Stride, etc.)
struct Parameter final {
static constexpr size_t height = 0u;
static constexpr size_t width = 1u;
};
};

struct Deleter final {
void operator()(const xnn_operator_t op) const {
xnn_delete_operator(op);
}
};

using Operator = std::unique_ptr<xnn_operator, Deleter>;

bool available();

} // namespace internal
} // namespace xnnpack
} // namespace native
} // namespace at

#endif /* USE_XNNPACK */
Loading

0 comments on commit 4b95293

Please sign in to comment.