Skip to content

Commit

Permalink
Merge pull request #234 from ska-sa/runtime-cpu-features
Browse files Browse the repository at this point in the history
Add detection of function multi-versioning in the compiler
  • Loading branch information
bmerry committed Aug 7, 2023
2 parents 5f1dead + aaf0ce4 commit 5cde973
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 9 deletions.
15 changes: 15 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,20 @@ SPEAD2_ARG_WITH(
)]
)

# -Werror is used to get an error (rather than a warning) if the compiler
# doesn't support the feature.
SPEAD2_ARG_WITH(
[fmv],
[AS_HELP_STRING([--without-fmv], [Do not use compiler function multi-versioning support])],
[SPEAD2_USE_FMV],
[SPEAD2_CHECK_FEATURE(
[fmv], [function multi-versioning], [], [], [],
[SPEAD2_USE_FMV=1], [],
[__attribute__((target("default"))) void foo() {}],
[-Werror]
)]
)

SPEAD2_ARG_WITH(
[movntdq],
[AS_HELP_STRING([--without-movntdq], [Do not use MOVNTDQ instruction for non-temporal copies])],
Expand Down Expand Up @@ -275,6 +289,7 @@ SPEAD2_PRINT_CONDITION([compiler optimization], [OPTIMIZED])
SPEAD2_PRINT_CONDITION([link-time optimization], [LTO])
SPEAD2_PRINT_CONDITION([coverage], [COVERAGE])
SPEAD2_PRINT_CONDITION([shared library], [SHARED_LIBRARY])
SPEAD2_PRINT_FEATURE([function multi-versioning], [test "x$SPEAD2_USE_FMV" = "x1"])
SPEAD2_PRINT_FEATURE([MOVNTDQ instruction], [test "x$SPEAD2_USE_MOVNTDQ" = "x1"])
echo ""
echo "System calls:"
Expand Down
9 changes: 8 additions & 1 deletion include/spead2/common_defines.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2015, 2020 National Research Foundation (SARAO)
/* Copyright 2015, 2020, 2023 National Research Foundation (SARAO)
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by the Free
Expand Down Expand Up @@ -28,6 +28,7 @@
#include <utility>
#include <string>
#include <functional>
#include <spead2/common_features.h>

#ifndef SPEAD2_MAX_LOG_LEVEL
#define SPEAD2_MAX_LOG_LEVEL (spead2::log_level::info)
Expand All @@ -43,6 +44,12 @@
# define SPEAD2_DEPRECATED(msg)
#endif

#if SPEAD2_USE_FMV
# define SPEAD2_FMV_TARGET(x) [[gnu::target(x)]]
#else
# define SPEAD2_FMV_TARGET(x)
#endif

/**
* SPEAD protocol sending and receiving. All SPEAD-64-* flavours are
* supported.
Expand Down
25 changes: 20 additions & 5 deletions include/spead2/common_features.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,30 @@
#define SPEAD2_USE_SENDMMSG @SPEAD2_USE_SENDMMSG@
#define SPEAD2_USE_EVENTFD @SPEAD2_USE_EVENTFD@
#define SPEAD2_USE_PTHREAD_SETAFFINITY_NP @SPEAD2_USE_PTHREAD_SETAFFINITY_NP@
/* Python on MacOS likes to build universal binaries, so even if it was
* detected at configure time, it might not be available for a particular
* build architecture.
/* Python on MacOS likes to build universal binaries, which causes problems
* because it doesn't match the compilation environment detected at
* configuration time. So features that we only use on x86 are only enabled
* if x86 is actually detected at build time.
*/
#if defined(__i386__) || defined(__i386) || defined(__x86_64__) || defined(__x86_64)
# define SPEAD2_USE_MOVNTDQ @SPEAD2_USE_MOVNTDQ@
#else

# define SPEAD2_USE_FMV @SPEAD2_USE_FMV@
/* On i386, MOVNTDQ is not guaranteed to exist at runtime, and we need function
* multi-versioning to make it safe to use.
*/
# if SPEAD2_USE_FMV || defined(__x86_64__) || defined(__x86_64)
# define SPEAD2_USE_MOVNTDQ @SPEAD2_USE_MOVNTDQ@
# else
# define SPEAD2_USE_MOVNTDQ 0
# endif

#else // not x86

# define SPEAD2_USE_FMV 0
# define SPEAD2_USE_MOVNTDQ 0

#endif

#define SPEAD2_USE_POSIX_SEMAPHORES @SPEAD2_USE_POSIX_SEMAPHORES@
#define SPEAD2_USE_PCAP @SPEAD2_USE_PCAP@

Expand Down
9 changes: 9 additions & 0 deletions include/spead2/common_memcpy.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include <cstddef>
#include <spead2/common_features.h>
#include <spead2/common_defines.h>

/**
* Variant of memcpy that uses a non-temporal hint for the destination.
Expand All @@ -30,7 +31,15 @@
namespace spead2
{

#if SPEAD2_USE_FMV || !SPEAD2_USE_MOVNTDQ
SPEAD2_FMV_TARGET("default")
void *memcpy_nontemporal(void * __restrict__ dest, const void * __restrict__ src, std::size_t n) noexcept;
#endif

#if SPEAD2_USE_MOVNTDQ
SPEAD2_FMV_TARGET("sse2")
void *memcpy_nontemporal(void * __restrict__ dest, const void * __restrict__ src, std::size_t n) noexcept;
#endif

} // namespace spead2

Expand Down
13 changes: 10 additions & 3 deletions src/common_memcpy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,18 @@
namespace spead2
{

#if SPEAD2_USE_FMV || !SPEAD2_USE_MOVNTDQ
SPEAD2_FMV_TARGET("default")
void *memcpy_nontemporal(void * __restrict__ dest, const void * __restrict__ src, std::size_t n) noexcept
{
#if !SPEAD2_USE_MOVNTDQ
return std::memcpy(dest, src, n);
#else
}
#endif // SPEAD2_USE_FMV || !SPEAD2_USE_MOVNTDQ

#if SPEAD2_USE_MOVNTDQ
SPEAD2_FMV_TARGET("sse2")
void *memcpy_nontemporal(void * __restrict__ dest, const void * __restrict__ src, std::size_t n) noexcept
{
char * __restrict__ dest_c = (char *) dest;
const char * __restrict__ src_c = (const char *) src;
// Align the destination to a cache-line boundary
Expand Down Expand Up @@ -73,7 +80,7 @@ void *memcpy_nontemporal(void * __restrict__ dest, const void * __restrict__ src
std::memcpy(dest_c + offset, src_c + offset, tail);
_mm_sfence();
return dest;
#endif // SPEAD2_USE_MOVNTDQ
}
#endif // SPEAD2_USE_MOVNTDQ

} // namespace spead2

0 comments on commit 5cde973

Please sign in to comment.