-
Notifications
You must be signed in to change notification settings - Fork 160
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Slow lrint()
lrintf()
functions in the release Windows build.
#187
Comments
Hi @rfomin . Probably we need this: libsndfile/libsndfile#663. |
This is looks good! What do you think of the |
You are referring to The patch here might be OK for x86_64, but it doesn't give us disabling SSE2 |
What if only use SSE2 intricstics when |
I guess... It robs the ability to use SSE2 with x86 builds though. Well, maintainers to decide. |
Off-topic:
I usually check |
You are right, according to docs we should use |
The x64 build is now as fast as one from MSYS2 MINGW64, or even slightly faster. But x86 build is still slower than one from MSYS2 MINGW32 by approximately 2 times. |
Sigh.. Possibly because it is using an x87 implementation of lrint/lrintf. Try this attached patch: sse2_patch.txt (also inlined below) @evpobr: If the patch is acceptable, I can create a PR for it. diff --git a/configure.ac b/configure.ac
index 9948acb..53eebbe 100644
--- a/configure.ac
+++ b/configure.ac
@@ -89,7 +89,7 @@ m4_define([abi_version_patch], [lt_revision])
dnl ====================================================================================
-AC_CHECK_HEADERS([stdbool.h stdint.h sys/times.h unistd.h])
+AC_CHECK_HEADERS([stdbool.h stdint.h sys/times.h unistd.h immintrin.h])
dnl ====================================================================================
dnl Couple of initializations here. Fill in real values later.
@@ -105,6 +105,9 @@ AC_ARG_ENABLE([werror],
AC_ARG_ENABLE([cpu-clip],
[AS_HELP_STRING([--disable-cpu-clip], [disable tricky cpu specific clipper])])
+AC_ARG_ENABLE([sse2-lrint],
+ [AS_HELP_STRING([--enable-sse2-lrint], [implement lrintf using SSE2 on x86 CPUs if possible])])
+
AC_ARG_ENABLE([sndfile],
[AS_HELP_STRING([--disable-sndfile], [disable support for sndfile (default=autodetect)])], [], [enable_sndfile=auto])
@@ -180,6 +183,13 @@ AC_DEFINE_UNQUOTED([CPU_CLIPS_POSITIVE], [${ac_cv_c_clip_positive}], [Host proce
AC_DEFINE_UNQUOTED([CPU_CLIPS_NEGATIVE], [${ac_cv_c_clip_negative}], [Host processor clips on negative float to int conversion.])
dnl ====================================================================================
+dnl Determine if the user enabled lrint implementations using SSE2.
+
+AS_IF([test "x$enable_sse2_lrint" = "xyes"], [
+ CFLAGS="$CFLAGS -DENABLE_SSE2_LRINT"
+ ])
+
+dnl ====================================================================================
dnl Check for libsndfile which is required for the test and example programs.
AS_IF([test "x$enable_sndfile" != "xno"], [
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7d8371c..d4f520d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -64,6 +64,11 @@ if(NOT (WIN32 OR APPLE OR CYGWIN OR HAIKU OR BEOS))
endif()
endif()
+option(LIBSAMPLERATE_SSE2_LRINT "Implement lrintf using SSE2 on x86 CPUs if possible" OFF)
+if(LIBSAMPLERATE_SSE2_LRINT)
+ add_definitions(-DENABLE_SSE2_LRINT)
+endif()
+
if(CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
option(LIBSAMPLERATE_ENABLE_SANITIZERS "Enable ASAN and UBSAN" OFF)
@@ -88,6 +93,7 @@ endif()
check_include_file(stdbool.h HAVE_STDBOOL_H)
check_include_file(unistd.h HAVE_UNISTD_H)
+check_include_file(immintrin.h HAVE_IMMINTRIN_H)
# For examples and tests
diff --git a/config.h.cmake b/config.h.cmake
index 2964c23..ec3229f 100644
--- a/config.h.cmake
+++ b/config.h.cmake
@@ -33,6 +33,9 @@
/* Define if you have C99's lrintf function. */
#cmakedefine01 HAVE_LRINTF
+/* Define to 1 if you have the <immintrin.h> header file. */
+#cmakedefine HAVE_IMMINTRIN_H
+
/* Define if you have signal SIGALRM. */
#cmakedefine01 HAVE_SIGALRM
diff --git a/src/common.h b/src/common.h
index 4664230..303f84d 100644
--- a/src/common.h
+++ b/src/common.h
@@ -14,9 +14,35 @@
#include <stdbool.h>
#endif
-#if defined(_WIN32) && (defined(_M_X64) || defined(__x86_64__))
+#if defined(__x86_64__) || defined(_M_X64)
+# define HAVE_SSE2_INTRINSICS
+#elif defined(ENABLE_SSE2_LRINT) && (defined(_M_IX86) || defined(__i386__))
+# if defined(_MSC_VER)
+# define HAVE_SSE2_INTRINSICS
+# elif defined(__clang__)
+# ifdef __SSE2__
+# define HAVE_SSE2_INTRINSICS
+# elif (__has_attribute(target))
+# define HAVE_SSE2_INTRINSICS
+# define USE_TARGET_ATTRIBUTE
+# endif
+# elif defined(__GNUC__)
+# ifdef __SSE2__
+# define HAVE_SSE2_INTRINSICS
+# elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9))
+# define HAVE_SSE2_INTRINSICS
+# define USE_TARGET_ATTRIBUTE
+# endif
+# endif
+#endif
+
+#ifdef HAVE_SSE2_INTRINSICS
+#ifdef HAVE_IMMINTRIN_H
#include <immintrin.h>
+#else
+#include <emmintrin.h>
#endif
+#endif /* HAVE_SSE2_INTRINSICS */
#include <math.h>
@@ -170,23 +196,36 @@ SRC_STATE *zoh_state_new (int channels, SRC_ERROR *error) ;
** SIMD optimized math functions.
*/
+#ifdef HAVE_SSE2_INTRINSICS
+static inline int
+#ifdef USE_TARGET_ATTRIBUTE
+__attribute__((target("sse2")))
+#endif
+psf_lrintf (float x)
+{
+ return _mm_cvtss_si32 (_mm_load_ss (&x)) ;
+}
+static inline int
+#ifdef USE_TARGET_ATTRIBUTE
+__attribute__((target("sse2")))
+#endif
+psf_lrint (double x)
+{
+ return _mm_cvtsd_si32 (_mm_load_sd (&x)) ;
+}
+
+#else /* No SSE2: */
+
static inline int psf_lrintf (float x)
{
- #if defined(_WIN32) && (defined(_M_X64) || defined(__x86_64__))
- return _mm_cvtss_si32 (_mm_load_ss (&x)) ;
- #else
- return lrintf (x) ;
- #endif
+ return lrintf (x) ;
} /* psf_lrintf */
static inline int psf_lrint (double x)
{
- #if defined(_WIN32) && (defined(_M_X64) || defined(__x86_64__))
- return _mm_cvtsd_si32 (_mm_load_sd (&x)) ;
- #else
- return lrint (x) ;
- #endif
+ return lrint (x) ;
} /* psf_lrint */
+#endif
/*----------------------------------------------------------
** Common static inline functions. |
- adds a configuration option to enable sse2-lrint/lrintf on x86 cpus, if possible. - sse2-lrint/lrintf inlines are now default on x86_64, for simplicity. Fixes libsndfile#187 (again..)
I went ahead and created #194 for it. |
- adds a configuration option to enable sse2-lrint/lrintf on x86 cpus, if possible. - sse2-lrint/lrintf inlines are now default on x86_64, for simplicity. Fixes #187 (again..)
@sezero with the patch and Since |
First off, I'm not a maintainer here so I don't have a say in that. |
The
SRC_LINEAR
method is about 10 times slower in the default MSVC build compared to the MinGW64 build. MinGW64 uses customlrint()
implementation. To solve this problem, I suggest adding /fp:fast option:An alternative solution is probably to resurrect the
float_cast.h
file for the MSVC case?The text was updated successfully, but these errors were encountered: