-
Notifications
You must be signed in to change notification settings - Fork 11.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[libc] add PREFER_GENERIC flag #73744
Conversation
There are some basic vectorization features in standard architecture specifications. Such as SSE/SSE2 for x86-64, or NEON for aarch64. Even though such features are almost always available, we still need some methods to test fallback routines without any vectorization. Previous attempt in hsearch adds a DISABLE_SSE2_OPT flag that tries to compile the code with -mno-sse2 in order to test specific table scanning routines. However, it turns out that such flag may have some unwanted side effects hindering portability. This PR introduces PREFER_GENERIC as an alternative. When a target is built with PREFER_GENERIC, cmake will define a macro __LIBC_PREFER_GENERIC such that developers can selectively choose the fallback routine based on the macro.
@llvm/pr-subscribers-libc Author: Schrodinger ZHU Yifan (SchrodingerZhu) ChangesThere are some basic vectorization features in standard architecture specifications. Such as SSE/SSE2 for x86-64, or NEON for aarch64. Even though such features are almost always available, we still need some methods to test fallback routines without any vectorization. Previous attempt in hsearch adds a DISABLE_SSE2_OPT flag that tries to compile the code with -mno-sse2 in order to test specific table scanning routines. However, it turns out that such flag may have some unwanted side effects hindering portability. This PR introduces PREFER_GENERIC as an alternative. When a target is built with PREFER_GENERIC, cmake will define a macro __LIBC_PREFER_GENERIC such that developers can selectively choose the fallback routine based on the macro. Full diff: https://github.com/llvm/llvm-project/pull/73744.diff 4 Files Affected:
diff --git a/libc/cmake/modules/LLVMLibCFlagRules.cmake b/libc/cmake/modules/LLVMLibCFlagRules.cmake
index 37ffe708fb7548c..7d663f0682e1f25 100644
--- a/libc/cmake/modules/LLVMLibCFlagRules.cmake
+++ b/libc/cmake/modules/LLVMLibCFlagRules.cmake
@@ -132,8 +132,9 @@ endfunction(get_fq_dep_list_without_flag)
# Special flags
set(FMA_OPT_FLAG "FMA_OPT")
set(ROUND_OPT_FLAG "ROUND_OPT")
-# SSE2 is the baseline for x86_64, so we add a negative flag to disable it if needed.
-set(DISABLE_SSE2_OPT_FLAG "DISABLE_SSE2_OPT")
+# This flag will define macros that gated away vectorization code such that
+# one can always test the fallback generic code path.
+set(PREFER_GENERIC_FLAG "PREFER_GENERIC")
# Skip FMA_OPT flag for targets that don't support fma.
if(NOT((LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "FMA")) OR
@@ -145,8 +146,3 @@ endif()
if(NOT(LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "SSE4_2")))
set(SKIP_FLAG_EXPANSION_ROUND_OPT TRUE)
endif()
-
-# Skip DISABLE_SSE2_OPT flag for targets that don't support SSE2.
-if(NOT(LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "SSE2")))
- set(SKIP_FLAG_EXPANSION_DISABLE_SSE2_OPT TRUE)
-endif()
diff --git a/libc/cmake/modules/LLVMLibCObjectRules.cmake b/libc/cmake/modules/LLVMLibCObjectRules.cmake
index 6d94ce97f0b689c..5fbbfd58db2d078 100644
--- a/libc/cmake/modules/LLVMLibCObjectRules.cmake
+++ b/libc/cmake/modules/LLVMLibCObjectRules.cmake
@@ -18,12 +18,12 @@ function(_get_common_compile_options output_var flags)
set(ADD_SSE4_2_FLAG TRUE)
endif()
- list(FIND flags ${DISABLE_SSE2_OPT_FLAG} no_sse2)
- if(${no_sse2} LESS 0)
- list(FIND flags "${DISABLE_SSE2_OPT_FLAG}__ONLY" no_sse2)
+ list(FIND flags ${PREFER_GENERIC_FLAG} prefer_generic)
+ if(${prefer_generic} LESS 0)
+ list(FIND flags "${PREFER_GENERIC_FLAG}__ONLY" prefer_generic)
endif()
- if((${no_sse2} GREATER -1) AND (LIBC_CPU_FEATURES MATCHES "SSE2"))
- set(DISABLE_SSE2_FLAG TRUE)
+ if(${prefer_generic} GREATER -1)
+ set(ADD_PREFER_GENERIC_FLAG TRUE)
endif()
set(compile_options ${LIBC_COMPILE_OPTIONS_DEFAULT} ${ARGN})
@@ -66,8 +66,8 @@ function(_get_common_compile_options output_var flags)
if(ADD_SSE4_2_FLAG)
list(APPEND compile_options "-msse4.2")
endif()
- if(DISABLE_SSE2_FLAG)
- list(APPEND compile_options "-mno-sse2")
+ if(ADD_PREFER_GENERIC_FLAG)
+ list(APPEND compile_options "-D__LIBC_PREFER_GENERIC")
endif()
elseif(MSVC)
list(APPEND compile_options "/EHs-c-")
@@ -75,8 +75,8 @@ function(_get_common_compile_options output_var flags)
if(ADD_FMA_FLAG)
list(APPEND compile_options "/arch:AVX2")
endif()
- if(DISABLE_SSE2_FLAG)
- list(APPEND compile_options "/arch:SSE")
+ if(ADD_PREFER_GENERIC_FLAG)
+ list(APPEND compile_options "/D__LIBC_PREFER_GENERIC")
endif()
endif()
if (LIBC_TARGET_ARCHITECTURE_IS_GPU)
diff --git a/libc/src/__support/HashTable/CMakeLists.txt b/libc/src/__support/HashTable/CMakeLists.txt
index 238d460dacd4285..22e91d4f8708c54 100644
--- a/libc/src/__support/HashTable/CMakeLists.txt
+++ b/libc/src/__support/HashTable/CMakeLists.txt
@@ -1,11 +1,9 @@
-# TODO: `DISABLE_SSE2_OPT` does not quite work yet.
-# We will investigate a better way of feature flag control.
add_header_library(
bitmask
HDRS
bitmask.h
FLAGS
- DISABLE_SSE2_OPT
+ PREFER_GENERIC
DEPENDS
libc.src.__support.bit
libc.src.__support.macros.properties.cpu_features
diff --git a/libc/src/__support/HashTable/bitmask.h b/libc/src/__support/HashTable/bitmask.h
index 761125feb951d9a..8247161c449bdd1 100644
--- a/libc/src/__support/HashTable/bitmask.h
+++ b/libc/src/__support/HashTable/bitmask.h
@@ -85,7 +85,7 @@ template <class BitMask> struct IteratableBitMaskAdaptor : public BitMask {
} // namespace internal
} // namespace LIBC_NAMESPACE
-#if defined(LIBC_TARGET_CPU_HAS_SSE2)
+#if defined(LIBC_TARGET_CPU_HAS_SSE2) && !defined(__LIBC_PREFER_GENERIC)
#include "sse2/bitmask_impl.inc"
#else
#include "generic/bitmask_impl.inc"
|
There are some basic vectorization features in standard architecture specifications. Such as SSE/SSE2 for x86-64, or NEON for aarch64. Even though such features are almost always available, we still need some methods to test fallback routines without any vectorization.
Previous attempt in hsearch adds a DISABLE_SSE2_OPT flag that tries to compile the code with -mno-sse2 in order to test specific table scanning routines. However, it turns out that such flag may have some unwanted side effects hindering portability.
This PR introduces PREFER_GENERIC as an alternative. When a target is built with PREFER_GENERIC, cmake will define a macro __LIBC_PREFER_GENERIC such that developers can selectively choose the fallback routine based on the macro.