diff --git a/libc/src/__support/CPP/simd.h b/libc/src/__support/CPP/simd.h index 449455c5c0390..0cc74fb24ccde 100644 --- a/libc/src/__support/CPP/simd.h +++ b/libc/src/__support/CPP/simd.h @@ -37,15 +37,15 @@ using get_as_integer_type_t = unsigned _BitInt(sizeof(T) * CHAR_BIT); #if defined(LIBC_TARGET_CPU_HAS_AVX512F) template -inline constexpr size_t native_vector_size = 64 / sizeof(T); +LIBC_INLINE_VAR constexpr size_t native_vector_size = 64 / sizeof(T); #elif defined(LIBC_TARGET_CPU_HAS_AVX2) template -inline constexpr size_t native_vector_size = 32 / sizeof(T); +LIBC_INLINE_VAR constexpr size_t native_vector_size = 32 / sizeof(T); #elif defined(LIBC_TARGET_CPU_HAS_SSE2) || defined(LIBC_TARGET_CPU_HAS_ARM_NEON) template -inline constexpr size_t native_vector_size = 16 / sizeof(T); +LIBC_INLINE_VAR constexpr size_t native_vector_size = 16 / sizeof(T); #else -template inline constexpr size_t native_vector_size = 1; +template LIBC_INLINE constexpr size_t native_vector_size = 1; #endif template LIBC_INLINE constexpr T poison() { @@ -90,122 +90,127 @@ using enable_if_simd_t = cpp::enable_if_t, T>; // Casting. template -LIBC_INLINE constexpr simd simd_cast(simd v) { +LIBC_INLINE constexpr static simd simd_cast(simd v) { return __builtin_convertvector(v, simd); } // SIMD mask operations. -template LIBC_INLINE constexpr bool all_of(simd m) { +template LIBC_INLINE constexpr static bool all_of(simd m) { return __builtin_reduce_and(m); } -template LIBC_INLINE constexpr bool any_of(simd m) { +template LIBC_INLINE constexpr static bool any_of(simd m) { return __builtin_reduce_or(m); } -template LIBC_INLINE constexpr bool none_of(simd m) { +template LIBC_INLINE constexpr static bool none_of(simd m) { return !any_of(m); } -template LIBC_INLINE constexpr bool some_of(simd m) { +template LIBC_INLINE constexpr static bool some_of(simd m) { return any_of(m) && !all_of(m); } -template LIBC_INLINE constexpr int popcount(simd m) { +template LIBC_INLINE constexpr static int popcount(simd m) { return __builtin_popcountg(m); } -template LIBC_INLINE constexpr int find_first_set(simd m) { +template +LIBC_INLINE constexpr static int find_first_set(simd m) { return __builtin_ctzg(m); } -template LIBC_INLINE constexpr int find_last_set(simd m) { +template +LIBC_INLINE constexpr static int find_last_set(simd m) { constexpr size_t size = simd_size_v>; return size - __builtin_clzg(m); } // Elementwise operations. template -LIBC_INLINE constexpr simd min(simd x, simd y) { +LIBC_INLINE constexpr static simd min(simd x, simd y) { return __builtin_elementwise_min(x, y); } template -LIBC_INLINE constexpr simd max(simd x, simd y) { +LIBC_INLINE constexpr static simd max(simd x, simd y) { return __builtin_elementwise_max(x, y); } template -LIBC_INLINE constexpr simd abs(simd x) { +LIBC_INLINE constexpr static simd abs(simd x) { return __builtin_elementwise_abs(x); } template -LIBC_INLINE constexpr simd fma(simd x, simd y, simd z) { +LIBC_INLINE constexpr static simd fma(simd x, simd y, + simd z) { return __builtin_elementwise_fma(x, y, z); } template -LIBC_INLINE constexpr simd ceil(simd x) { +LIBC_INLINE constexpr static simd ceil(simd x) { return __builtin_elementwise_ceil(x); } template -LIBC_INLINE constexpr simd floor(simd x) { +LIBC_INLINE constexpr static simd floor(simd x) { return __builtin_elementwise_floor(x); } template -LIBC_INLINE constexpr simd roundeven(simd x) { +LIBC_INLINE constexpr static simd roundeven(simd x) { return __builtin_elementwise_roundeven(x); } template -LIBC_INLINE constexpr simd round(simd x) { +LIBC_INLINE constexpr static simd round(simd x) { return __builtin_elementwise_round(x); } template -LIBC_INLINE constexpr simd trunc(simd x) { +LIBC_INLINE constexpr static simd trunc(simd x) { return __builtin_elementwise_trunc(x); } template -LIBC_INLINE constexpr simd nearbyint(simd x) { +LIBC_INLINE constexpr static simd nearbyint(simd x) { return __builtin_elementwise_nearbyint(x); } template -LIBC_INLINE constexpr simd rint(simd x) { +LIBC_INLINE constexpr static simd rint(simd x) { return __builtin_elementwise_rint(x); } template -LIBC_INLINE constexpr simd canonicalize(simd x) { +LIBC_INLINE constexpr static simd canonicalize(simd x) { return __builtin_elementwise_canonicalize(x); } template -LIBC_INLINE constexpr simd copysign(simd x, simd y) { +LIBC_INLINE constexpr static simd copysign(simd x, simd y) { return __builtin_elementwise_copysign(x, y); } template -LIBC_INLINE constexpr simd fmod(simd x, simd y) { +LIBC_INLINE constexpr static simd fmod(simd x, simd y) { return __builtin_elementwise_fmod(x, y); } // Reduction operations. template > -LIBC_INLINE constexpr T reduce(simd v, Op op = {}) { +LIBC_INLINE constexpr static T reduce(simd v, Op op = {}) { return reduce(v, op); } template -LIBC_INLINE constexpr T reduce(simd v, cpp::plus<>) { +LIBC_INLINE constexpr static T reduce(simd v, cpp::plus<>) { return __builtin_reduce_add(v); } template -LIBC_INLINE constexpr T reduce(simd v, cpp::multiplies<>) { +LIBC_INLINE constexpr static T reduce(simd v, cpp::multiplies<>) { return __builtin_reduce_mul(v); } template -LIBC_INLINE constexpr T reduce(simd v, cpp::bit_and<>) { +LIBC_INLINE constexpr static T reduce(simd v, cpp::bit_and<>) { return __builtin_reduce_and(v); } template -LIBC_INLINE constexpr T reduce(simd v, cpp::bit_or<>) { +LIBC_INLINE constexpr static T reduce(simd v, cpp::bit_or<>) { return __builtin_reduce_or(v); } template -LIBC_INLINE constexpr T reduce(simd v, cpp::bit_xor<>) { +LIBC_INLINE constexpr static T reduce(simd v, cpp::bit_xor<>) { return __builtin_reduce_xor(v); } -template LIBC_INLINE constexpr T hmin(simd v) { +template +LIBC_INLINE constexpr static T hmin(simd v) { return __builtin_reduce_min(v); } -template LIBC_INLINE constexpr T hmax(simd v) { +template +LIBC_INLINE constexpr static T hmax(simd v) { return __builtin_reduce_max(v); } @@ -242,28 +247,29 @@ LIBC_INLINE enable_if_simd_t masked_store(simd> m, T v, } // Construction helpers. -template LIBC_INLINE constexpr simd splat(T v) { +template +LIBC_INLINE constexpr static simd splat(T v) { return simd(v); } -template LIBC_INLINE constexpr simd splat(T v) { +template LIBC_INLINE constexpr static simd splat(T v) { return splat>>(v); } template -LIBC_INLINE constexpr simd iota(T base = T(0), T step = T(1)) { +LIBC_INLINE constexpr static simd iota(T base = T(0), T step = T(1)) { simd v{}; for (unsigned i = 0; i < N; ++i) v[i] = base + T(i) * step; return v; } template -LIBC_INLINE constexpr simd iota(T base = T(0), T step = T(1)) { +LIBC_INLINE constexpr static simd iota(T base = T(0), T step = T(1)) { return iota>>(base, step); } // Conditional helpers. template -LIBC_INLINE constexpr simd select(simd m, simd x, - simd y) { +LIBC_INLINE constexpr static simd select(simd m, simd x, + simd y) { return m ? x : y; }