diff --git a/Makefile.am b/Makefile.am index 5291824..24f6e0c 100644 --- a/Makefile.am +++ b/Makefile.am @@ -3,6 +3,7 @@ snzip_SOURCES = snzip.c snzip.h snzip-format.c framing-format.c framing2-format. if HAVE_SSE4_2 snzip_SOURCES += crc32_sse4_2.c endif +snzip_LDFLAGS = @LDFLAGS_SSE4_2@ CFLAGS_SSE4_2 = @CFLAGS_SSE4_2@ PROGS = snzip bin_PROGRAMS = $(PROGS) diff --git a/clearcap.map b/clearcap.map new file mode 100644 index 0000000..9c1bf46 --- /dev/null +++ b/clearcap.map @@ -0,0 +1,2 @@ +# This is used only for Solaris Studio to clear hardware capability flags. +hwcap_1 = V0x0 OVERRIDE; diff --git a/configure.ac b/configure.ac index d1e7113..fa5c20b 100644 --- a/configure.ac +++ b/configure.ac @@ -45,18 +45,43 @@ AC_CHECK_MEMBERS([struct stat.st_mtimensec, struct stat.st_mtim.tv_nsec, struct # unlocked stdio functions AC_CHECK_FUNCS(getc_unlocked putc_unlocked fread_unlocked fwrite_unlocked ferror_unlocked feof_unlocked) -# Check SSE4.2 -AC_MSG_CHECKING(whether -msse4.2 is accepted as CFLAGS) -saved_CFLAGS="$CFLAGS" -CFLAGS="$CFLAGS -msse4.2" -AC_TRY_COMPILE([], [], - [AC_MSG_RESULT(yes); CFLAGS_SSE4_2=-msse4.2], - [AC_MSG_RESULT(no); CFLAGS_SSE4_2=]) -AC_SUBST([CFLAGS_SSE4_2]) +AC_ARG_ENABLE([sse4_2], + [AS_HELP_STRING([--disable-sse4_2], + [don't use sse4.2 to calculate crc32])], + [], + []) + +CFLAGS_SSE4_2= +LDFLAGS_SSE4_2= +AS_IF([test "x$enable_sse4_2" != xno], + [ + AC_MSG_CHECKING(SSE4.2 CFLAGS) + AS_IF([test "x$GCC" = xyes], + [CFLAGS_SSE4_2=-msse4.2], + [AS_CASE(["$host_os"], + [solaris*], [CFLAGS_SSE4_2=-xarch=sse4_2; + LDFLAGS_SSE4_2=-Wl,-M,clearcap.map + ]) + ]) + AS_IF([test "$CFLAGS_SSE4_2"], + [AC_MSG_RESULT($CFLAGS_SSE4_2)], + [AC_MSG_RESULT()]) + ]) -AC_CHECK_DECLS([_mm_crc32_u32], [], [], [#include ]) -CFLAGS="$saved_CFLAGS" +AS_IF([test "$CFLAGS_SSE4_2"], + [ + AC_MSG_CHECKING([whether $CFLAGS_SSE4_2 is accepted as CFLAGS]) + saved_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $CFLAGS_SSE4_2" + AC_TRY_COMPILE([], [], + [AC_MSG_RESULT(yes)], + [AC_MSG_RESULT(no); CFLAGS_SSE4_2=; LDFLAGS_SSE4_2=]) + AC_CHECK_DECLS([_mm_crc32_u32], [], [], [#include ]) + CFLAGS="$saved_CFLAGS" + ]) +AC_SUBST([CFLAGS_SSE4_2]) +AC_SUBST([LDFLAGS_SSE4_2]) AS_IF([test "x$ac_cv_have_decl__mm_crc32_u32" = xyes], [AC_DEFINE([HAVE_SSE4_2], 1, [Define to 1 if you have SSE4.2])]) AM_CONDITIONAL([HAVE_SSE4_2], [test "x$ac_cv_have_decl__mm_crc32_u32" = xyes]) diff --git a/crc32.c b/crc32.c index 0aa8967..e10c778 100644 --- a/crc32.c +++ b/crc32.c @@ -773,64 +773,86 @@ multitable_crc32c(uint32_t crc32c, * authors and should not be interpreted as representing official policies, either expressed * or implied, of the authors. */ -#if defined _MSC_VER && _MSC_VER >= 1400 +#define CPUID_SSE4_2_IS_SET(x) (((x) & (1u << 20)) ? 1 : 0) + +#if defined __GNUC__ +#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) +#include +#define USE_GCC_INTRINSIC 1 /* gcc 4.3 or later */ +#else +#define USE_GCC_ASM 1 +#endif +#endif + +#if defined __SUNPRO_C && __SUNPRO_C >= 0x590 +#define USE_GCC_ASM 1 /* Sun Studio 12 or later */ +#endif + +#if defined _MSC_VER +#if _MSC_VER >= 1400 #include +#define USE_MSVC_INTRINSIC 1 /* Visual Studio 2005 or later */ +#else +#define USE_MSVC_ASM 1 #endif -#if defined __GNUC__ && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) -#include #endif -#define CPUID_ECX_BIT_SSE4_2 (1u << 20) +#if defined USE_GCC_INTRINSIC +static int sse4_2_is_available(void) +{ + unsigned int eax, ebx, ecx, edx; + __cpuid(1, eax, ebx, ecx, edx); + return CPUID_SSE4_2_IS_SET(ecx); +} +#elif defined USE_GCC_ASM static int sse4_2_is_available(void) { -#if defined __GNUC__ /* GNU C Compiler */ -#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) - /* gcc version >= 4.3 */ - unsigned int eax_, ebx_, ecx_, edx_; - __cpuid(1, eax_, ebx_, ecx_, edx_); -#else - /* gcc version < 4.3 */ - unsigned int ecx_; -#if defined(__i386__) && defined(__PIC__) + unsigned int ecx; +#if (defined(__i386__) || defined(__i386)) && defined(__PIC__) __asm( "movl $1, %%eax;" "pushl %%ebx;" "cpuid;" "popl %%ebx;" - : "=c" (ecx_) + : "=c" (ecx) : : "eax", "edx"); #else __asm( "movl $1, %%eax;" "cpuid;" - : "=c" (ecx_) + : "=c" (ecx) : : "eax", "ebx", "edx"); #endif -#endif -#elif defined _MSC_VER /* Microsoft Visual C++ */ -#if _MSC_VER >= 1400 - /* msvc version >= 2005 */ - int cpuinfo[4], ecx_; + return CPUID_SSE4_2_IS_SET(ecx); +} + +#elif defined USE_MSVC_INTRINSIC +static int sse4_2_is_available(void) +{ + int cpuinfo[4]; __cpuid(cpuinfo, 1); - ecx_ = cpuinfo[2]; -#else - /* msvc version < 2005 */ - unsigned int ecx_; + return CPUID_SSE4_2_IS_SET(cpuinfo[2]); +} + +#elif defined USE_MSVC_ASM +static int sse4_2_is_available(void) +{ + unsigned int rv; __asm { mov eax, 1 cpuid - mov ecx_, ecx + mov rv, ecx } -#endif -#else /* Other compilers */ -#error unsupported compiler -#endif - return (ecx_ & CPUID_ECX_BIT_SSE4_2) ? 1 : 0; + return CPUID_SSE4_2_IS_SET(rv); } +#else +#error unsupported compiler to use cpuid instruction. run 'configure' with --disable-sse4_2 +#endif + static uint32_t select_crc32c_func(uint32_t crc32c, const unsigned char *buffer,unsigned int length); static uint32_t (*crc32c_func)(uint32_t, const unsigned char *, unsigned int) = select_crc32c_func;