Skip to content

Commit

Permalink
update xnnpack to newer version and update API usage in pytorch (pyto…
Browse files Browse the repository at this point in the history
…rch#94330)

Summary:
Pull Request resolved: pytorch#94330

Update XNNPACK to 51a987591a6fc9f0fc0707077f53d763ac132cbf (https://github.com/google/XNNPACK/commits/51a987591a6fc9f0fc0707077f53d763ac132cbf)

Update the corresponding CMake and BUCK rules, as well as the generate_wrapper.py for the new version.

Due to XNNPACK having already changed a lot. We need to update XNNPACK in this time for many reasons. Firstly, XNNAPCK has updated a lot, and developers' community has re-factored codes' such as API changes. We can see from their cmakefile.txt to see there are many changes! Thus, in order to follow up upstream. We need to update xnnpack at this time. It is very crucial for our future development. Also, many projects are relying on newer versions of XNNPACK, so we probably need to update XNNPACK third-party libs at this time. we have some api changes of XNNPACK, so we also need to update them in this time. We also update target building files and generate-wrapper.py file to make this process more automatically. The original target files have some files which are missing, so we add them into buck2 building files so that it can build and test XNNPACK successfully.

This commit needs to allow large files to be committed since we are updating newer xnnpack version.

Test Plan:
buck2 build //xplat/third-party/XNNPACK:operators
buck2 build //xplat/third-party/XNNPACK:XNNPACK
buck2 test fbcode//caffe2/test:xnnpack_integration

Reviewed By: digantdesai

Differential Revision: D43092938

fbshipit-source-id: 6fa5028544533ce40ce1b4b91d57524bc88a3f8c
  • Loading branch information
Cuiqing Li authored and facebook-github-bot committed Feb 8, 2023
1 parent 3ce1ebb commit 6b67439
Show file tree
Hide file tree
Showing 7 changed files with 13,608 additions and 1,553 deletions.
4 changes: 4 additions & 0 deletions aten/src/ATen/native/quantized/cpu/XnnpackUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ enum xnn_status xnnp_create_convolution2d_nhwc(
op_min, /* int8_t output_min */
op_max, /* int8_t output_max */
flags, /* uint32_t flags */
nullptr, /* xnn_caches_t caches */
op); /* xnn_operator_t* deconvolution_op_out */

}
Expand Down Expand Up @@ -130,6 +131,7 @@ enum xnn_status xnnp_create_convolution2d_nhwc(
op_min, /* int8_t output_min */
op_max, /* int8_t output_max */
flags, /* uint32_t flags */
nullptr, /* xnn_caches_t caches */
op); /* xnn_operator_t* convolution_op_out */
} else { /* per_channel */
return xnn_create_convolution2d_nhwc_qc8(
Expand Down Expand Up @@ -158,6 +160,7 @@ enum xnn_status xnnp_create_convolution2d_nhwc(
op_min, /* int8_t output_min */
op_max, /* int8_t output_max */
flags, /* uint32_t flags */
nullptr, /* xnn_caches_t caches */
op); /* xnn_operator_t* convolution_op_out */
}
}
Expand Down Expand Up @@ -254,6 +257,7 @@ enum xnn_status xnnp_create_fully_connected_nc(
output_min, /* int8_t output_min */
output_max, /* int8_t output_max */
flags, /* uint32_t flags */
nullptr, /* xnn_caches_t caches */
fully_connected_op_out); /* xnn_operator_t* fully_connected_op_out */
}

Expand Down
2 changes: 2 additions & 0 deletions aten/src/ATen/native/xnnpack/Convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ ContextConv2D create(
output_min, // output_min
output_max, // output_max
0u, // flags
nullptr, // xnn_caches_t
&convolution_op); // operator
} else {
for (const auto i : c10::irange(4)) {
Expand Down Expand Up @@ -264,6 +265,7 @@ ContextConv2D create(
output_min, // output_min
output_max, // output_max
0u, // flags
nullptr, // xnn_caches_t
&convolution_op); // operator
}

Expand Down
1 change: 1 addition & 0 deletions aten/src/ATen/native/xnnpack/Linear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ ContextLinear create(
output_min, // output_min
output_max, // output_max
0u, // flags
nullptr, // xnn_caches_t
&linear_op); // operator

TORCH_CHECK(
Expand Down
2 changes: 1 addition & 1 deletion third_party/XNNPACK
Submodule XNNPACK updated 13838 files
125 changes: 119 additions & 6 deletions third_party/xnnpack.buck.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ load(
"PROD_SSE_MICROKERNEL_SRCS",
"PROD_SSSE3_MICROKERNEL_SRCS",
"PROD_XOP_MICROKERNEL_SRCS",
"ALL_NEONFMA_AARCH64_MICROKERNEL_SRCS",
"ALL_NEON_AARCH64_MICROKERNEL_SRCS",
"PROD_AVX512VBMI_MICROKERNEL_SRCS",
)

# This defines XNNPACK targets for both fbsource BUCK and OSS BUCK
Expand Down Expand Up @@ -99,6 +102,7 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
preferred_linkage = "static",
preprocessor_flags = [
"-DXNN_LOG_LEVEL=0",
"-DXNN_ENABLE_GEMM_M_SPECIALIZATION=0",
],
visibility = ["PUBLIC"],
windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
Expand Down Expand Up @@ -131,6 +135,9 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
preferred_linkage = "static",
preprocessor_flags = [
"-DXNN_LOG_LEVEL=0",
"-DXNN_ENABLE_JIT=0",
"-DXNN_ENABLE_SPARSE=0",
"-DXNN_ENABLE_MEMOPT",
],
visibility = ["PUBLIC"],
windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
Expand Down Expand Up @@ -1088,6 +1095,53 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
],
)

fb_xplat_cxx_library(
name = "prod_avx512vbmi",
srcs = (select({
"DEFAULT": [],
"ovr_config//os:macos-x86_64": PROD_AVX512VBMI_MICROKERNEL_SRCS,
}) if is_arvr_mode() else []),
headers = subdir_glob([
("XNNPACK/src", "**/*.c"),
("XNNPACK/src", "**/*.h"),
]),
header_namespace = "",
apple_sdks = (IOS, MACOSX, APPLETVOS),
compiler_flags = [
"-O2",
"-mavx512f",
],
fbobjc_preprocessor_flags = [
"-DXNN_PRIVATE=",
"-DXNN_INTERNAL=",
],
labels = labels,
platform_compiler_flags = [
(
"x86",
[
"-mavx512f",
],
),
],
platform_srcs = ([
(
"x86|x86_64|platform009|platform010",
PROD_AVX512F_MICROKERNEL_SRCS,
),
] if not is_arvr_mode() else []),
preferred_linkage = "static",
preprocessor_flags = [
"-DXNN_LOG_LEVEL=0",
],
visibility = ["PUBLIC"],
windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx512f"],
windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx512f"],
deps = [
":interface",
],
)

fb_xplat_cxx_library(
name = "ukernels_avx512_ovr_win32",
headers = subdir_glob([
Expand Down Expand Up @@ -1474,7 +1528,7 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F

fb_xplat_cxx_library(
name = "ukernels_neon_aarch64",
srcs = PROD_AARCH64_NEON_MICROKERNEL_SRCS,
srcs = ALL_NEON_AARCH64_MICROKERNEL_SRCS,
headers = subdir_glob([
("XNNPACK/src", "**/*.c"),
("XNNPACK/src", "**/*.h"),
Expand Down Expand Up @@ -1589,6 +1643,47 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
],
)

fb_xplat_cxx_library(
name = "ukernels_neonfma_aarch64",
srcs = ALL_NEONFMA_AARCH64_MICROKERNEL_SRCS,
headers = subdir_glob([
("XNNPACK/src", "**/*.h"),
("XNNPACK/src", "**/*.c"),
]),
header_namespace = "",
apple_sdks = (IOS, MACOSX, APPLETVOS),
compiler_flags = [
"-O2",
],
fbobjc_preprocessor_flags = [
"-DXNN_PRIVATE=",
"-DXNN_INTERNAL=",
],
labels = labels,
platform_compiler_flags = [
(
"^(android-armv8|iphoneos-armv8)$",
[
"-march=armv8-a",
"-mfpu=neon-fp-armv8",
"-mfloat-abi=softfp",
],
),
],
platforms = (APPLE, ANDROID, CXX, WINDOWS),
preferred_linkage = "static",
preprocessor_flags = [
"-DXNN_LOG_LEVEL=0",
],
visibility = ["PUBLIC"],
windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
windows_compiler_flags_override = WINDOWS_FLAGS,
deps = [
":interface",
third_party("FP16"),
],
)

fb_xplat_cxx_library(
name = "ukernels_asm_aarch32",
srcs = AARCH32_ASM_MICROKERNEL_SRCS,
Expand Down Expand Up @@ -1686,6 +1781,7 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
":ukernels_neon_fp16",
":ukernels_neon_fp16arith_aarch64",
":ukernels_neon_v8",
":ukernels_neonfma_aarch64",
],
)

Expand All @@ -1707,6 +1803,7 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
":ukernels_sse41",
":ukernels_ssse3",
":ukernels_xop",
":prod_avx512vbmi",
],
)

Expand Down Expand Up @@ -1749,6 +1846,7 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
":ukernels_neon_fp16arith_aarch64",
":ukernels_neon_v8",
":ukernels_scalar_aarch32",
":ukernels_neonfma_aarch64",
],
)

Expand Down Expand Up @@ -1820,15 +1918,30 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
"-DXNN_NO_X8_OPERATORS",
"-DXNN_NO_XX_OPERATORS",
"-DXNN_ENABLE_MEMOPT",
"-DXNN_ENABLE_SPARSE=0",
"-DXNN_ENABLE_JIT=0",
"-DXNN_ENABLE_ASSEMBLY",
"-DXNN_ENABLE_GEMM_M_SPECIALIZATION",
"-DXNN_ENABLE_ARM_DOTPROD",
],
srcs = [
"XNNPACK/src/allocator.c",
"XNNPACK/src/init.c",
"XNNPACK/src/memory-planner.c",
"XNNPACK/src/operator-delete.c",
"XNNPACK/src/runtime.c",
"XNNPACK/src/subgraph.c",
"XNNPACK/src/tensor.c",
"XNNPACK/src/params.c",
"XNNPACK/src/operator-run.c",
"XNNPACK/src/microparams-init.c",
"XNNPACK/src/binary-elementwise-config.c",
"XNNPACK/src/packing.c",
"XNNPACK/src/indirection.c",
"XNNPACK/src/cache.c",
"XNNPACK/src/mutex.c",
"XNNPACK/src/operator-utils.c",
"XNNPACK/src/memory.c",
"XNNPACK/src/hardware-config.c",
"XNNPACK/src/x8-lut-config.c",
"XNNPACK/src/normalization.c",
"XNNPACK/src/transpose-config.c",
"XNNPACK/src/amalgam/scalar.c",
] + LOGGING_SRCS,
visibility = ["PUBLIC"],
windows_clang_compiler_flags_override = (WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS) if XNNPACK_WINDOWS_AVX512F_ENABLED else WINDOWS_FLAGS,
Expand Down
Loading

0 comments on commit 6b67439

Please sign in to comment.