diff --git a/configure.ac b/configure.ac index 597f879a9..606a2eddd 100644 --- a/configure.ac +++ b/configure.ac @@ -155,6 +155,20 @@ SPEAD2_ARG_WITH( )] ) +# -Werror is used to get an error (rather than a warning) if the compiler +# doesn't support the feature. +SPEAD2_ARG_WITH( + [fmv], + [AS_HELP_STRING([--without-fmv], [Do not use compiler function multi-versioning support])], + [SPEAD2_USE_FMV], + [SPEAD2_CHECK_FEATURE( + [fmv], [function multi-versioning], [], [], [], + [SPEAD2_USE_FMV=1], [], + [__attribute__((target("default"))) void foo() {}], + [-Werror] + )] +) + SPEAD2_ARG_WITH( [movntdq], [AS_HELP_STRING([--without-movntdq], [Do not use MOVNTDQ instruction for non-temporal copies])], @@ -275,6 +289,7 @@ SPEAD2_PRINT_CONDITION([compiler optimization], [OPTIMIZED]) SPEAD2_PRINT_CONDITION([link-time optimization], [LTO]) SPEAD2_PRINT_CONDITION([coverage], [COVERAGE]) SPEAD2_PRINT_CONDITION([shared library], [SHARED_LIBRARY]) +SPEAD2_PRINT_FEATURE([function multi-versioning], [test "x$SPEAD2_USE_FMV" = "x1"]) SPEAD2_PRINT_FEATURE([MOVNTDQ instruction], [test "x$SPEAD2_USE_MOVNTDQ" = "x1"]) echo "" echo "System calls:" diff --git a/include/spead2/common_defines.h b/include/spead2/common_defines.h index de2427855..3668ba701 100644 --- a/include/spead2/common_defines.h +++ b/include/spead2/common_defines.h @@ -1,4 +1,4 @@ -/* Copyright 2015, 2020 National Research Foundation (SARAO) +/* Copyright 2015, 2020, 2023 National Research Foundation (SARAO) * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -28,6 +28,7 @@ #include #include #include +#include #ifndef SPEAD2_MAX_LOG_LEVEL #define SPEAD2_MAX_LOG_LEVEL (spead2::log_level::info) @@ -43,6 +44,12 @@ # define SPEAD2_DEPRECATED(msg) #endif +#if SPEAD2_USE_FMV +# define SPEAD2_FMV_TARGET(x) [[gnu::target(x)]] +#else +# define SPEAD2_FMV_TARGET(x) +#endif + /** * SPEAD protocol sending and receiving. All SPEAD-64-* flavours are * supported. diff --git a/include/spead2/common_features.h.in b/include/spead2/common_features.h.in index 085446e4c..c7b7b7657 100644 --- a/include/spead2/common_features.h.in +++ b/include/spead2/common_features.h.in @@ -35,15 +35,30 @@ #define SPEAD2_USE_SENDMMSG @SPEAD2_USE_SENDMMSG@ #define SPEAD2_USE_EVENTFD @SPEAD2_USE_EVENTFD@ #define SPEAD2_USE_PTHREAD_SETAFFINITY_NP @SPEAD2_USE_PTHREAD_SETAFFINITY_NP@ -/* Python on MacOS likes to build universal binaries, so even if it was - * detected at configure time, it might not be available for a particular - * build architecture. +/* Python on MacOS likes to build universal binaries, which causes problems + * because it doesn't match the compilation environment detected at + * configuration time. So features that we only use on x86 are only enabled + * if x86 is actually detected at build time. */ #if defined(__i386__) || defined(__i386) || defined(__x86_64__) || defined(__x86_64) -# define SPEAD2_USE_MOVNTDQ @SPEAD2_USE_MOVNTDQ@ -#else + +# define SPEAD2_USE_FMV @SPEAD2_USE_FMV@ +/* On i386, MOVNTDQ is not guaranteed to exist at runtime, and we need function + * multi-versioning to make it safe to use. + */ +# if SPEAD2_USE_FMV || defined(__x86_64__) || defined(__x86_64) +# define SPEAD2_USE_MOVNTDQ @SPEAD2_USE_MOVNTDQ@ +# else +# define SPEAD2_USE_MOVNTDQ 0 +# endif + +#else // not x86 + +# define SPEAD2_USE_FMV 0 # define SPEAD2_USE_MOVNTDQ 0 + #endif + #define SPEAD2_USE_POSIX_SEMAPHORES @SPEAD2_USE_POSIX_SEMAPHORES@ #define SPEAD2_USE_PCAP @SPEAD2_USE_PCAP@ diff --git a/include/spead2/common_memcpy.h b/include/spead2/common_memcpy.h index bedeb3638..a522d7448 100644 --- a/include/spead2/common_memcpy.h +++ b/include/spead2/common_memcpy.h @@ -19,6 +19,7 @@ #include #include +#include /** * Variant of memcpy that uses a non-temporal hint for the destination. @@ -30,7 +31,15 @@ namespace spead2 { +#if SPEAD2_USE_FMV || !SPEAD2_USE_MOVNTDQ +SPEAD2_FMV_TARGET("default") void *memcpy_nontemporal(void * __restrict__ dest, const void * __restrict__ src, std::size_t n) noexcept; +#endif + +#if SPEAD2_USE_MOVNTDQ +SPEAD2_FMV_TARGET("sse2") +void *memcpy_nontemporal(void * __restrict__ dest, const void * __restrict__ src, std::size_t n) noexcept; +#endif } // namespace spead2 diff --git a/src/common_memcpy.cpp b/src/common_memcpy.cpp index 7295eaa52..860ce43ec 100644 --- a/src/common_memcpy.cpp +++ b/src/common_memcpy.cpp @@ -27,11 +27,18 @@ namespace spead2 { +#if SPEAD2_USE_FMV || !SPEAD2_USE_MOVNTDQ +SPEAD2_FMV_TARGET("default") void *memcpy_nontemporal(void * __restrict__ dest, const void * __restrict__ src, std::size_t n) noexcept { -#if !SPEAD2_USE_MOVNTDQ return std::memcpy(dest, src, n); -#else +} +#endif // SPEAD2_USE_FMV || !SPEAD2_USE_MOVNTDQ + +#if SPEAD2_USE_MOVNTDQ +SPEAD2_FMV_TARGET("sse2") +void *memcpy_nontemporal(void * __restrict__ dest, const void * __restrict__ src, std::size_t n) noexcept +{ char * __restrict__ dest_c = (char *) dest; const char * __restrict__ src_c = (const char *) src; // Align the destination to a cache-line boundary @@ -73,7 +80,7 @@ void *memcpy_nontemporal(void * __restrict__ dest, const void * __restrict__ src std::memcpy(dest_c + offset, src_c + offset, tail); _mm_sfence(); return dest; -#endif // SPEAD2_USE_MOVNTDQ } +#endif // SPEAD2_USE_MOVNTDQ } // namespace spead2