diff --git a/defs.bzl b/defs.bzl index 04f3a718e1..d7dcbd30ec 100644 --- a/defs.bzl +++ b/defs.bzl @@ -97,11 +97,17 @@ def get_fbgemm_avx2_srcs(msvc = False): "src/UtilsAvx2.cc", ] -def get_fbgemm_inline_avx2_srcs(msvc = False): - return [ - #FP16 kernels contain inline assembly and inline assembly syntax for MSVC is different. - "src/FbgemmFP16UKernelsAvx2.cc" if not msvc else "src/FbgemmFP16UKernelsIntrinsicAvx2.cc", - ] +def get_fbgemm_inline_avx2_srcs(msvc = False, buck = False): + intrinsics_srcs = ["src/FbgemmFP16UKernelsIntrinsicAvx2.cc"] + + #FP16 kernels contain inline assembly and inline assembly syntax for MSVC is different. + asm_srcs = ["src/FbgemmFP16UKernelsAvx2.cc"] + if buck: + return select({ + "DEFAULT": asm_srcs if not msvc else intrinsics_srcs, + "ovr_config//cpu:arm64": intrinsics_srcs, + }) + return asm_srcs if not msvc else intrinsics_srcs def get_fbgemm_avx512_srcs(msvc = False): return [ @@ -116,12 +122,21 @@ def get_fbgemm_avx512_srcs(msvc = False): "src/UtilsAvx512.cc", ] -def get_fbgemm_inline_avx512_srcs(msvc = False): - return [ - #FP16 kernels contain inline assembly and inline assembly syntax for MSVC is different. - "src/FbgemmFP16UKernelsAvx512.cc" if not msvc else "src/FbgemmFP16UKernelsIntrinsicAvx512.cc", - "src/FbgemmFP16UKernelsAvx512_256.cc" if not msvc else "src/FbgemmFP16UKernelsIntrinsicAvx512_256.cc", +def get_fbgemm_inline_avx512_srcs(msvc = False, buck = False): + intrinsics_srcs = [ + "src/FbgemmFP16UKernelsIntrinsicAvx512.cc", + "src/FbgemmFP16UKernelsIntrinsicAvx512_256.cc", + ] + asm_srcs = [ + "src/FbgemmFP16UKernelsAvx512.cc", + "src/FbgemmFP16UKernelsAvx512_256.cc", ] + if buck: + return select({ + "DEFAULT": asm_srcs if not msvc else intrinsics_srcs, + "ovr_config//cpu:arm64": intrinsics_srcs, + }) + return asm_srcs if not msvc else intrinsics_srcs def get_fbgemm_tests(skip_tests = []): return native.glob(["test/*Test.cc"], exclude = skip_tests) diff --git a/src/FbgemmBfloat16Convert.cc b/src/FbgemmBfloat16Convert.cc index 76eec63ef6..482c97ea62 100644 --- a/src/FbgemmBfloat16Convert.cc +++ b/src/FbgemmBfloat16Convert.cc @@ -43,9 +43,12 @@ namespace fbgemm { void FloatToBfloat16_simd(const float* src, bfloat16* dst, size_t size) { // Run time CPU detection if (cpuinfo_initialize()) { +#ifndef __aarch64__ if (fbgemmHasAvx512Support()) { FloatToBfloat16_avx512(src, dst, size); - } else if (fbgemmHasAvx2Support()) { + } else +#endif + if (fbgemmHasAvx2Support()) { FloatToBfloat16_avx2(src, dst, size); } else { FloatToBfloat16_ref(src, dst, size); @@ -59,9 +62,12 @@ void FloatToBfloat16_simd(const float* src, bfloat16* dst, size_t size) { void Bfloat16ToFloat_simd(const bfloat16* src, float* dst, size_t size) { // Run time CPU detection if (cpuinfo_initialize()) { +#ifndef __aarch64__ if (fbgemmHasAvx512Support()) { Bfloat16ToFloat_avx512(src, dst, size); - } else if (fbgemmHasAvx2Support()) { + } else +#endif + if (fbgemmHasAvx2Support()) { Bfloat16ToFloat_avx2(src, dst, size); } else { Bfloat16ToFloat_ref(src, dst, size); diff --git a/src/FbgemmFP16.cc b/src/FbgemmFP16.cc index d30f6017d6..2bd66a6866 100644 --- a/src/FbgemmFP16.cc +++ b/src/FbgemmFP16.cc @@ -49,6 +49,7 @@ constexpr kernel_array_t kernel_fp16_avx512_256 = { gemmkernel_14x2_Avx512_256_fp16_fA0fB0fC0}; constexpr kernel_array_t kernel_fp16_avx512 = { +#ifndef __aarch64__ nullptr, gemmkernel_1x2_Avx512_fp16_fA0fB0fC0, gemmkernel_2x2_Avx512_fp16_fA0fB0fC0, @@ -63,7 +64,11 @@ constexpr kernel_array_t kernel_fp16_avx512 = { gemmkernel_11x2_Avx512_fp16_fA0fB0fC0, gemmkernel_12x2_Avx512_fp16_fA0fB0fC0, gemmkernel_13x2_Avx512_fp16_fA0fB0fC0, - gemmkernel_14x2_Avx512_fp16_fA0fB0fC0}; + gemmkernel_14x2_Avx512_fp16_fA0fB0fC0 +#else + nullptr +#endif +}; } // namespace diff --git a/src/FbgemmFP16UKernelsIntrinsicAvx2.cc b/src/FbgemmFP16UKernelsIntrinsicAvx2.cc index 7e64582d93..4ac159eadc 100644 --- a/src/FbgemmFP16UKernelsIntrinsicAvx2.cc +++ b/src/FbgemmFP16UKernelsIntrinsicAvx2.cc @@ -5,7 +5,6 @@ * LICENSE file in the root directory of this source tree. */ -#ifdef _MSC_VER #if defined(__x86_64__) || defined(__i386__) || \ (defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))) #include @@ -115,4 +114,3 @@ void NOINLINE gemmkernel_6x2_Avx2_fp16_fA0fB0fC0(GemmParamsFP16* gp) { } } // namespace fbgemm -#endif // _MSC_VER diff --git a/src/FbgemmFP16UKernelsIntrinsicAvx512.cc b/src/FbgemmFP16UKernelsIntrinsicAvx512.cc index 5ad9ea6222..ce69a5d969 100644 --- a/src/FbgemmFP16UKernelsIntrinsicAvx512.cc +++ b/src/FbgemmFP16UKernelsIntrinsicAvx512.cc @@ -5,7 +5,6 @@ * LICENSE file in the root directory of this source tree. */ -#ifdef _MSC_VER #if defined(__x86_64__) || defined(__i386__) || \ (defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))) #include @@ -140,4 +139,3 @@ void NOINLINE gemmkernel_14x2_Avx512_fp16_fA0fB0fC0(GemmParamsFP16* gp) { } } // namespace fbgemm -#endif // _MSC_VER diff --git a/src/FbgemmFP16UKernelsIntrinsicAvx512_256.cc b/src/FbgemmFP16UKernelsIntrinsicAvx512_256.cc index cf9f5f8f18..3445cecc77 100644 --- a/src/FbgemmFP16UKernelsIntrinsicAvx512_256.cc +++ b/src/FbgemmFP16UKernelsIntrinsicAvx512_256.cc @@ -5,7 +5,6 @@ * LICENSE file in the root directory of this source tree. */ -#ifdef _MSC_VER #if defined(__x86_64__) || defined(__i386__) || \ (defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))) #include @@ -121,4 +120,3 @@ void NOINLINE gemmkernel_14x2_Avx512_256_fp16_fA0fB0fC0(GemmParamsFP16* gp) { } } // namespace fbgemm -#endif // _MSC_VER diff --git a/src/QuantUtilsAvx2.cc b/src/QuantUtilsAvx2.cc index dd3772d49c..e96bd379fb 100644 --- a/src/QuantUtilsAvx2.cc +++ b/src/QuantUtilsAvx2.cc @@ -166,6 +166,9 @@ void NO_SANITIZE("address") FusedQuantizeDequantizeAvx2( float inverse_scale = 1.f / qparams.scale; constexpr int32_t min_val = std::numeric_limits::min(); constexpr int32_t max_val = std::numeric_limits::max(); + (void)inverse_scale; // Suppress unused variable warning + (void)min_val; // Suppress unused variable warning + (void)max_val; // Suppress unused variable warning #if defined(__AVX2__) && (defined(__FMA__) || defined(_MSC_VER)) constexpr int VLEN = 8;