diff --git a/Makefile b/Makefile index 3c1f7a3516..ccccb58a84 100644 --- a/Makefile +++ b/Makefile @@ -288,8 +288,8 @@ HEADERS = $(wildcard $(ROOTDIR)/$(SRCDIR)/template/*.c) $(wildcard $(ROOTDIR)/$( $(ROOTDIR)/include/libxsmm_timer.h \ $(ROOTDIR)/include/libxsmm_typedefs.h SRCFILES_LIB = $(patsubst %,$(ROOTDIR)/$(SRCDIR)/%, \ - libxsmm_main.c libxsmm_malloc.c libxsmm_math.c libxsmm_sync.c \ - libxsmm_python.c libxsmm_mhd.c libxsmm_timer.c libxsmm_perf.c \ + libxsmm_main.c libxsmm_malloc.c libxsmm_hash.c libxsmm_math.c \ + libxsmm_sync.c libxsmm_python.c libxsmm_mhd.c libxsmm_timer.c libxsmm_perf.c \ libxsmm_gemm.c libxsmm_xcopy.c libxsmm_blocked_gemm.c libxsmm_spmdm.c libxsmm_fsspmdm.c libxsmm_rng.c\ libxsmm_dnn.c libxsmm_dnn_dryruns.c libxsmm_dnn_setup.c libxsmm_dnn_handle.c libxsmm_dnn_elementwise.c \ libxsmm_dnn_rnncell.c libxsmm_dnn_rnncell_forward.c libxsmm_dnn_rnncell_backward_weight_update.c \ @@ -573,10 +573,9 @@ $(INCDIR)/libxsmm_config.h: $(INCDIR)/.make .state $(ROOTDIR)/$(SRCDIR)/template fi @$(CP) $(ROOTDIR)/include/libxsmm_blocked_gemm.h $(INCDIR) 2>/dev/null || true @$(CP) $(ROOTDIR)/include/libxsmm_cpuid.h $(INCDIR) 2>/dev/null || true - @$(CP) $(ROOTDIR)/include/libxsmm_rng.h $(INCDIR) 2>/dev/null || true @$(CP) $(ROOTDIR)/include/libxsmm_dnn.h $(INCDIR) 2>/dev/null || true - @$(CP) $(ROOTDIR)/include/libxsmm_dnn_fusedbatchnorm.h $(INCDIR) 2>/dev/null || true @$(CP) $(ROOTDIR)/include/libxsmm_dnn_fullyconnected.h $(INCDIR) 2>/dev/null || true + @$(CP) $(ROOTDIR)/include/libxsmm_dnn_fusedbatchnorm.h $(INCDIR) 2>/dev/null || true @$(CP) $(ROOTDIR)/include/libxsmm_dnn_pooling.h $(INCDIR) 2>/dev/null || true @$(CP) $(ROOTDIR)/include/libxsmm_dnn_rnncell.h $(INCDIR) 2>/dev/null || true @$(CP) $(ROOTDIR)/include/libxsmm_frontend.h $(INCDIR) 2>/dev/null || true @@ -587,6 +586,7 @@ $(INCDIR)/libxsmm_config.h: $(INCDIR)/.make .state $(ROOTDIR)/$(SRCDIR)/template @$(CP) $(ROOTDIR)/include/libxsmm_malloc.h $(INCDIR) 2>/dev/null || true @$(CP) $(ROOTDIR)/include/libxsmm_math.h $(INCDIR) 2>/dev/null || true @$(CP) $(ROOTDIR)/include/libxsmm_mhd.h $(INCDIR) 2>/dev/null || true + @$(CP) $(ROOTDIR)/include/libxsmm_rng.h $(INCDIR) 2>/dev/null || true @$(CP) $(ROOTDIR)/include/libxsmm_spmdm.h $(INCDIR) 2>/dev/null || true @$(CP) $(ROOTDIR)/include/libxsmm_sync.h $(INCDIR) 2>/dev/null || true @$(CP) $(ROOTDIR)/include/libxsmm_timer.h $(INCDIR) 2>/dev/null || true diff --git a/ide/libxsmm.vcxproj b/ide/libxsmm.vcxproj index bba3535fef..a21f6c98a2 100644 --- a/ide/libxsmm.vcxproj +++ b/ide/libxsmm.vcxproj @@ -180,14 +180,7 @@ - - true - true - true - true - true - true - + diff --git a/src/libxsmm_gemm.c b/src/libxsmm_gemm.c index 969db39aa7..db7f143a1e 100644 --- a/src/libxsmm_gemm.c +++ b/src/libxsmm_gemm.c @@ -72,8 +72,8 @@ #endif #if (0 != LIBXSMM_SYNC) /** Locks for the batch interface (duplicated C indexes). */ -# define LIBXSMM_GEMM_LOCKIDX(IDX, NPOT) LIBXSMM_MOD2(LIBXSMM_CONCATENATE(libxsmm_crc32_u,LIBXSMM_BLASINT_NBITS)(&(IDX), 2507/*seed*/), NPOT) -# define LIBXSMM_GEMM_LOCKPTR(PTR, NPOT) LIBXSMM_MOD2(libxsmm_crc32_u64(&(PTR), 1975/*seed*/), NPOT) +# define LIBXSMM_GEMM_LOCKIDX(IDX, NPOT) LIBXSMM_MOD2(LIBXSMM_CONCATENATE(libxsmm_crc32_u,LIBXSMM_BLASINT_NBITS)(2507/*seed*/, &(IDX)), NPOT) +# define LIBXSMM_GEMM_LOCKPTR(PTR, NPOT) LIBXSMM_MOD2(libxsmm_crc32_u64(1975/*seed*/, &(PTR)), NPOT) # if !defined(LIBXSMM_GEMM_MAXNLOCKS) # define LIBXSMM_GEMM_MAXNLOCKS 1024 # endif diff --git a/src/libxsmm_hash.c b/src/libxsmm_hash.c index da773b1eb7..c7db83b5b1 100644 --- a/src/libxsmm_hash.c +++ b/src/libxsmm_hash.c @@ -46,82 +46,82 @@ # define LIBXSMM_HASH_ALIGNMENT 8 #endif -#define LIBXSMM_HASH_U64(FN, BEGIN, END, SEED) { \ +#define LIBXSMM_HASH_U64(FN, SEED, BEGIN, END) { \ for (; (BEGIN) < ((END) - 7); (BEGIN) += 8) { LIBXSMM_ASSERT(NULL != (BEGIN)); \ - SEED = (uint32_t)FN(BEGIN, SEED); \ + SEED = (uint32_t)FN(SEED, BEGIN); \ } \ } -#define LIBXSMM_HASH_U32(FN, BEGIN, END, SEED) { \ +#define LIBXSMM_HASH_U32(FN, SEED, BEGIN, END) { \ const uint8_t *const next = (BEGIN) + 4; \ if (next <= (END)) { LIBXSMM_ASSERT(NULL != (BEGIN)); \ - SEED = FN(BEGIN, SEED); BEGIN = next; \ + SEED = FN(SEED, BEGIN); BEGIN = next; \ } \ } -#define LIBXSMM_HASH_U16(FN, BEGIN, END, SEED) { \ +#define LIBXSMM_HASH_U16(FN, SEED, BEGIN, END) { \ const uint8_t *const next = (BEGIN) + 2; \ if (next <= (END)) { LIBXSMM_ASSERT(NULL != (BEGIN)); \ - SEED = FN(BEGIN, SEED); BEGIN = next; \ + SEED = FN(SEED, BEGIN); BEGIN = next; \ } \ } -#define LIBXSMM_HASH_U8(FN, BEGIN, END, SEED) { \ +#define LIBXSMM_HASH_U8(FN, SEED, BEGIN, END) { \ if ((BEGIN) < (END)) { LIBXSMM_ASSERT(NULL != (BEGIN)); \ - SEED = FN(BEGIN, SEED); ++(BEGIN); \ + SEED = FN(SEED, BEGIN); ++(BEGIN); \ } \ } -#define LIBXSMM_HASH_CRC32_U8(PVALUE, SEED) _mm_crc32_u8(SEED, *(const uint8_t*)(PVALUE)) -#define LIBXSMM_HASH_CRC32_U16(PVALUE, SEED) _mm_crc32_u16(SEED, *(const uint16_t*)(PVALUE)) -#define LIBXSMM_HASH_CRC32_U32(PVALUE, SEED) _mm_crc32_u32(SEED, *(const uint32_t*)(PVALUE)) +#define LIBXSMM_HASH_CRC32_U8(SEED, PVALUE) _mm_crc32_u8(SEED, *(const uint8_t*)(PVALUE)) +#define LIBXSMM_HASH_CRC32_U16(SEED, PVALUE) _mm_crc32_u16(SEED, *(const uint16_t*)(PVALUE)) +#define LIBXSMM_HASH_CRC32_U32(SEED, PVALUE) _mm_crc32_u32(SEED, *(const uint32_t*)(PVALUE)) #if (64 > (LIBXSMM_BITS)) -# define LIBXSMM_HASH_CRC32_U64(PVALUE, SEED) \ +# define LIBXSMM_HASH_CRC32_U64(SEED, PVALUE) \ LIBXSMM_HASH_CRC32_U32(((const uint32_t*)(PVALUE))[1], \ LIBXSMM_HASH_CRC32_U32(((const uint32_t*)(PVALUE))[0], (uint32_t)(SEED))) #else -# define LIBXSMM_HASH_CRC32_U64(PVALUE, SEED) _mm_crc32_u64(SEED, *(const uint64_t*)(PVALUE)) +# define LIBXSMM_HASH_CRC32_U64(SEED, PVALUE) _mm_crc32_u64(SEED, *(const uint64_t*)(PVALUE)) #endif -#define LIBXSMM_HASH_UNALIGNED(FN64, FN32, FN16, FN8, DATA, SIZE, SEED) { \ +#define LIBXSMM_HASH_UNALIGNED(FN64, FN32, FN16, FN8, SEED, DATA, SIZE) { \ const uint8_t *begin = (const uint8_t*)(DATA); \ const uint8_t *const endb = begin + (SIZE); \ - LIBXSMM_HASH_U64(FN64, begin, endb, SEED); \ - LIBXSMM_HASH_U32(FN32, begin, endb, SEED); \ - LIBXSMM_HASH_U16(FN16, begin, endb, SEED); \ - return begin == endb ? (SEED) : FN8(begin, SEED); \ + LIBXSMM_HASH_U64(FN64, SEED, begin, endb); \ + LIBXSMM_HASH_U32(FN32, SEED, begin, endb); \ + LIBXSMM_HASH_U16(FN16, SEED, begin, endb); \ + return begin == endb ? (SEED) : FN8(SEED, begin); \ } #if defined(LIBXSMM_HASH_ALIGNMENT) && 8 < (LIBXSMM_HASH_ALIGNMENT) -# define LIBXSMM_HASH(FN64, FN32, FN16, FN8, DATA, SIZE, SEED) { \ +# define LIBXSMM_HASH(FN64, FN32, FN16, FN8, SEED, DATA, SIZE) { \ const uint8_t *begin = (const uint8_t*)(DATA); \ const uint8_t *const endb = begin + (SIZE); \ const uint8_t *const enda = LIBXSMM_ALIGN(begin, LIBXSMM_HASH_ALIGNMENT); \ if ((SIZE) > (size_t)(endb - enda)) { \ - LIBXSMM_HASH_U64(FN64, begin, enda, SEED); \ - LIBXSMM_HASH_U32(FN32, begin, enda, SEED); \ - LIBXSMM_HASH_U16(FN16, begin, enda, SEED); \ - LIBXSMM_HASH_U8(FN8, begin, enda, SEED); \ + LIBXSMM_HASH_U64(FN64, SEED, begin, enda); \ + LIBXSMM_HASH_U32(FN32, SEED, begin, enda); \ + LIBXSMM_HASH_U16(FN16, SEED, begin, enda); \ + LIBXSMM_HASH_U8(FN8, SEED, begin, enda); \ } \ LIBXSMM_ASSUME_ALIGNED(begin, LIBXSMM_HASH_ALIGNMENT); \ - LIBXSMM_HASH_U64(FN64, begin, endb, SEED); \ - LIBXSMM_HASH_U32(FN32, begin, endb, SEED); \ - LIBXSMM_HASH_U16(FN16, begin, endb, SEED); \ - return begin == endb ? (SEED) : FN8(begin, SEED); \ + LIBXSMM_HASH_U64(FN64, SEED, begin, endb); \ + LIBXSMM_HASH_U32(FN32, SEED, begin, endb); \ + LIBXSMM_HASH_U16(FN16, SEED, begin, endb); \ + return begin == endb ? (SEED) : FN8(SEED, begin); \ } #elif defined(LIBXSMM_HASH_ALIGNMENT) && 1 < (LIBXSMM_HASH_ALIGNMENT) -# define LIBXSMM_HASH(FN64, FN32, FN16, FN8, DATA, SIZE, SEED) { \ +# define LIBXSMM_HASH(FN64, FN32, FN16, FN8, SEED, DATA, SIZE) { \ const uint8_t *begin = (const uint8_t*)(DATA); \ const uint8_t *const endb = begin + (SIZE); \ const uint8_t *const enda = LIBXSMM_ALIGN(begin, LIBXSMM_HASH_ALIGNMENT); \ if ((SIZE) > (size_t)(endb - enda)) { \ - LIBXSMM_HASH_U32(FN32, begin, enda, SEED); \ - LIBXSMM_HASH_U16(FN16, begin, enda, SEED); \ - LIBXSMM_HASH_U8(FN8, begin, enda, SEED); \ + LIBXSMM_HASH_U32(FN32, SEED, begin, enda); \ + LIBXSMM_HASH_U16(FN16, SEED, begin, enda); \ + LIBXSMM_HASH_U8(FN8, SEED, begin, enda); \ } \ LIBXSMM_ASSUME_ALIGNED(begin, LIBXSMM_HASH_ALIGNMENT); \ - LIBXSMM_HASH_U64(FN64, begin, endb, SEED); \ - LIBXSMM_HASH_U32(FN32, begin, endb, SEED); \ - LIBXSMM_HASH_U16(FN16, begin, endb, SEED); \ - return begin == endb ? (SEED) : FN8(begin, SEED); \ + LIBXSMM_HASH_U64(FN64, SEED, begin, endb); \ + LIBXSMM_HASH_U32(FN32, SEED, begin, endb); \ + LIBXSMM_HASH_U16(FN16, SEED, begin, endb); \ + return begin == endb ? (SEED) : FN8(SEED, begin); \ } #else # define LIBXSMM_HASH LIBXSMM_HASH_UNALIGNED @@ -129,15 +129,186 @@ typedef uint32_t internal_crc32_entry_type[256]; LIBXSMM_APIVAR(const internal_crc32_entry_type* internal_crc32_table); -LIBXSMM_APIVAR(libxsmm_hash_value_function internal_hash_u32_function); -LIBXSMM_APIVAR(libxsmm_hash_value_function internal_hash_u64_function); -LIBXSMM_APIVAR(libxsmm_hash_value_function internal_hash_u128_function); -LIBXSMM_APIVAR(libxsmm_hash_value_function internal_hash_u256_function); -LIBXSMM_APIVAR(libxsmm_hash_value_function internal_hash_u512_function); +LIBXSMM_APIVAR(libxsmm_hash_function internal_hash_u32_function); +LIBXSMM_APIVAR(libxsmm_hash_function internal_hash_u64_function); +LIBXSMM_APIVAR(libxsmm_hash_function internal_hash_u128_function); +LIBXSMM_APIVAR(libxsmm_hash_function internal_hash_u256_function); +LIBXSMM_APIVAR(libxsmm_hash_function internal_hash_u384_function); +LIBXSMM_APIVAR(libxsmm_hash_function internal_hash_u512_function); LIBXSMM_APIVAR(libxsmm_hash_function internal_hash_function); -LIBXSMM_HASH_API_DEFINITION void libxsmm_hash_init(int target_arch) +LIBXSMM_API_INLINE unsigned int internal_crc32_u8(unsigned int seed, const void* value) +{ + const uint8_t u8 = *(const uint8_t*)value; + LIBXSMM_ASSERT(NULL != internal_crc32_table); + return internal_crc32_table[0][(seed^u8) & 0xFF] ^ (seed >> 8); +} + + +LIBXSMM_API_INLINE unsigned int internal_crc32_u16(unsigned int seed, const void* value) +{ + const uint8_t *const pu8 = (const uint8_t*)value; + seed = internal_crc32_u8(seed, pu8 + 0); + seed = internal_crc32_u8(seed, pu8 + 1); + return seed; +} + + +LIBXSMM_API_INLINE unsigned int internal_crc32_u32(unsigned int seed, const void* value, ...) +{ + const uint32_t u32 = *(const uint32_t*)value, s = seed ^ u32; + uint32_t c0, c1, c2, c3; + LIBXSMM_ASSERT(NULL != internal_crc32_table); + c0 = internal_crc32_table[0][(s >> 24) & 0xFF]; + c1 = internal_crc32_table[1][(s >> 16) & 0xFF]; + c2 = internal_crc32_table[2][(s >> 8) & 0xFF]; + c3 = internal_crc32_table[3][s & 0xFF]; + return (c0 ^ c1) ^ (c2 ^ c3); +} + + +LIBXSMM_API_INLINE LIBXSMM_INTRINSICS(LIBXSMM_X86_SSE4) +unsigned int internal_crc32_u32_sse4(unsigned int seed, const void* value, ...) +{ +#if defined(LIBXSMM_INTRINSICS_SSE4) + return LIBXSMM_HASH_CRC32_U32(seed, value); +#else + return internal_crc32_u32(seed, value); +#endif +} + + +LIBXSMM_API_INLINE unsigned int internal_crc32_u64(unsigned int seed, const void* value, ...) +{ + const uint32_t *const pu32 = (const uint32_t*)value; + seed = internal_crc32_u32(seed, pu32 + 0); + seed = internal_crc32_u32(seed, pu32 + 1); + return seed; +} + + +LIBXSMM_API_INLINE LIBXSMM_INTRINSICS(LIBXSMM_X86_SSE4) +unsigned int internal_crc32_u64_sse4(unsigned int seed, const void* value, ...) +{ +#if defined(LIBXSMM_INTRINSICS_SSE4) + return (unsigned int)LIBXSMM_HASH_CRC32_U64(seed, value); +#else + return internal_crc32_u64(seed, value); +#endif +} + + +LIBXSMM_API_INLINE unsigned int internal_crc32_u128(unsigned int seed, const void* value, ...) +{ + const uint64_t *const pu64 = (const uint64_t*)value; + seed = internal_crc32_u64(seed, pu64 + 0); + seed = internal_crc32_u64(seed, pu64 + 1); + return seed; +} + + +LIBXSMM_API_INLINE LIBXSMM_INTRINSICS(LIBXSMM_X86_SSE4) +unsigned int internal_crc32_u128_sse4(unsigned int seed, const void* value, ...) +{ +#if defined(LIBXSMM_INTRINSICS_SSE4) + const uint64_t *const pu64 = (const uint64_t*)value; + return (unsigned int)LIBXSMM_HASH_CRC32_U64(LIBXSMM_HASH_CRC32_U64(seed, pu64), pu64 + 1); +#else + return internal_crc32_u128(seed, value); +#endif +} + + +LIBXSMM_API_INLINE unsigned int internal_crc32_u256(unsigned int seed, const void* value, ...) +{ + const uint8_t *const pu8 = (const uint8_t*)value; + seed = internal_crc32_u128(seed, pu8 + 0x00); + seed = internal_crc32_u128(seed, pu8 + 0x10); + return seed; +} + + +LIBXSMM_API_INLINE LIBXSMM_INTRINSICS(LIBXSMM_X86_SSE4) +unsigned int internal_crc32_u256_sse4(unsigned int seed, const void* value, ...) +{ +#if defined(LIBXSMM_INTRINSICS_SSE4) + const uint8_t *const pu8 = (const uint8_t*)value; + seed = internal_crc32_u128_sse4(seed, pu8 + 0x00); + seed = internal_crc32_u128_sse4(seed, pu8 + 0x10); + return seed; +#else + return internal_crc32_u256(seed, value); +#endif +} + + +LIBXSMM_API_INLINE unsigned int internal_crc32_u384(unsigned int seed, const void* value, ...) +{ + const uint8_t *const pu8 = (const uint8_t*)value; + seed = internal_crc32_u256(seed, pu8 + 0x00); + seed = internal_crc32_u128(seed, pu8 + 0x20); + return seed; +} + + +LIBXSMM_API_INLINE LIBXSMM_INTRINSICS(LIBXSMM_X86_SSE4) +unsigned int internal_crc32_u384_sse4(unsigned int seed, const void* value, ...) +{ +#if defined(LIBXSMM_INTRINSICS_SSE4) + const uint8_t *const pu8 = (const uint8_t*)value; + seed = internal_crc32_u256_sse4(seed, pu8 + 0x00); + seed = internal_crc32_u128_sse4(seed, pu8 + 0x20); + return seed; +#else + return internal_crc32_u384(seed, value); +#endif +} + + +LIBXSMM_API_INLINE unsigned int internal_crc32_u512(unsigned int seed, const void* value, ...) +{ + const uint8_t *const pu8 = (const uint8_t*)value; + seed = internal_crc32_u256(seed, pu8 + 0x00); + seed = internal_crc32_u256(seed, pu8 + 0x20); + return seed; +} + + +LIBXSMM_API_INLINE LIBXSMM_INTRINSICS(LIBXSMM_X86_SSE4) +unsigned int internal_crc32_u512_sse4(unsigned int seed, const void* value, ...) +{ +#if defined(LIBXSMM_INTRINSICS_SSE4) + const uint8_t *const pu8 = (const uint8_t*)value; + seed = internal_crc32_u256_sse4(seed, pu8 + 0x00); + seed = internal_crc32_u256_sse4(seed, pu8 + 0x20); + return seed; +#else + return internal_crc32_u512(seed, value); +#endif +} + + +LIBXSMM_API_INLINE unsigned int internal_crc32(unsigned int seed, const void* data, size_t size) +{ + LIBXSMM_ASSERT(NULL != data || 0 == size); + LIBXSMM_HASH(internal_crc32_u64, internal_crc32_u32, internal_crc32_u16, internal_crc32_u8, seed, data, size); +} + + +LIBXSMM_API_INLINE LIBXSMM_INTRINSICS(LIBXSMM_X86_SSE4) +unsigned int internal_crc32_sse4(unsigned int seed, const void* data, size_t size) +{ + LIBXSMM_ASSERT(NULL != data || 0 == size); +#if defined(LIBXSMM_INTRINSICS_SSE4) + LIBXSMM_HASH(LIBXSMM_HASH_CRC32_U64, LIBXSMM_HASH_CRC32_U32, LIBXSMM_HASH_CRC32_U16, LIBXSMM_HASH_CRC32_U8, seed, data, size); +#else + return internal_crc32(seed, data, size); +#endif +} + + +LIBXSMM_API_INTERN void libxsmm_hash_init(int target_arch) { /* table-based implementation taken from http://dpdk.org/. */ static const LIBXSMM_RETARGETABLE internal_crc32_entry_type crc32_table[] = { @@ -279,25 +450,22 @@ LIBXSMM_HASH_API_DEFINITION void libxsmm_hash_init(int target_arch) } }; internal_crc32_table = crc32_table; -#if defined(LIBXSMM_HASH_SW) +#if (LIBXSMM_X86_SSE4 <= LIBXSMM_STATIC_TARGET_ARCH) LIBXSMM_UNUSED(target_arch); #else -# if (LIBXSMM_X86_SSE4 <= LIBXSMM_STATIC_TARGET_ARCH) - LIBXSMM_UNUSED(target_arch); -# else if (LIBXSMM_X86_SSE4 <= target_arch) -# endif +#endif { - internal_hash_u32_function = libxsmm_crc32_u32_sse4; - internal_hash_u64_function = libxsmm_crc32_u64_sse4; - internal_hash_u128_function = libxsmm_crc32_u128_sse4; - internal_hash_u256_function = libxsmm_crc32_u256_sse4; - internal_hash_u512_function = libxsmm_crc32_u512_sse4; - internal_hash_function = libxsmm_crc32_sse4; + internal_hash_u32_function = internal_crc32_u32_sse4; + internal_hash_u64_function = internal_crc32_u64_sse4; + internal_hash_u128_function = internal_crc32_u128_sse4; + internal_hash_u256_function = internal_crc32_u256_sse4; + internal_hash_u384_function = internal_crc32_u384_sse4; + internal_hash_u512_function = internal_crc32_u512_sse4; + internal_hash_function = (libxsmm_hash_function)internal_crc32_sse4; } -# if (LIBXSMM_X86_SSE4 > LIBXSMM_STATIC_TARGET_ARCH) +#if (LIBXSMM_X86_SSE4 > LIBXSMM_STATIC_TARGET_ARCH) else -# endif #endif #if (LIBXSMM_X86_SSE4 > LIBXSMM_STATIC_TARGET_ARCH) { @@ -308,24 +476,26 @@ LIBXSMM_HASH_API_DEFINITION void libxsmm_hash_init(int target_arch) error_once = 1; /* no need for atomics */ } # endif - internal_hash_u32_function = libxsmm_crc32_u32_sw; - internal_hash_u64_function = libxsmm_crc32_u64_sw; - internal_hash_u128_function = libxsmm_crc32_u128_sw; - internal_hash_u256_function = libxsmm_crc32_u256_sw; - internal_hash_u512_function = libxsmm_crc32_u512_sw; - internal_hash_function = libxsmm_crc32_sw; + internal_hash_u32_function = internal_crc32_u32; + internal_hash_u64_function = internal_crc32_u64; + internal_hash_u128_function = internal_crc32_u128; + internal_hash_u256_function = internal_crc32_u256; + internal_hash_u384_function = internal_crc32_u384; + internal_hash_u512_function = internal_crc32_u512; + internal_hash_function = (libxsmm_hash_function)internal_crc32; } #endif LIBXSMM_ASSERT(NULL != internal_hash_u32_function); LIBXSMM_ASSERT(NULL != internal_hash_u64_function); LIBXSMM_ASSERT(NULL != internal_hash_u128_function); LIBXSMM_ASSERT(NULL != internal_hash_u256_function); + LIBXSMM_ASSERT(NULL != internal_hash_u384_function); LIBXSMM_ASSERT(NULL != internal_hash_u512_function); LIBXSMM_ASSERT(NULL != internal_hash_function); } -LIBXSMM_HASH_API_DEFINITION void libxsmm_hash_finalize(void) +LIBXSMM_API_INTERN void libxsmm_hash_finalize(void) { #if !defined(NDEBUG) internal_crc32_table = NULL; @@ -333,221 +503,75 @@ LIBXSMM_HASH_API_DEFINITION void libxsmm_hash_finalize(void) internal_hash_u64_function = NULL; internal_hash_u128_function = NULL; internal_hash_u256_function = NULL; + internal_hash_u384_function = NULL; internal_hash_u512_function = NULL; internal_hash_function = NULL; #endif } -LIBXSMM_API_INLINE unsigned int internal_crc32_u8_sw(const void* value, unsigned int seed) +LIBXSMM_API_INTERN unsigned int libxsmm_crc32_u32(unsigned int seed, const void* value, ...) { - const uint8_t u8 = *(const uint8_t*)value; - LIBXSMM_ASSERT(NULL != internal_crc32_table); - return internal_crc32_table[0][(seed^u8)&0xFF] ^ (seed >> 8); -} - - -LIBXSMM_API_INLINE unsigned int internal_crc32_u16_sw(const void* value, unsigned int seed) -{ - const uint8_t *const pu8 = (const uint8_t*)value; - seed = internal_crc32_u8_sw(pu8 + 0, seed); - seed = internal_crc32_u8_sw(pu8 + 1, seed); - return seed; -} - - -LIBXSMM_HASH_API_DEFINITION unsigned int libxsmm_crc32_u32(const void* value, unsigned int seed) -{ -#if (LIBXSMM_X86_SSE4 <= LIBXSMM_STATIC_TARGET_ARCH) && !defined(LIBXSMM_HASH_SW) - return LIBXSMM_HASH_CRC32_U32(value, seed); +#if (LIBXSMM_X86_SSE4 <= LIBXSMM_STATIC_TARGET_ARCH) + return LIBXSMM_HASH_CRC32_U32(seed, value); #else /* pointer based function call */ LIBXSMM_ASSERT(NULL != internal_hash_u32_function); - return internal_hash_u32_function(value, seed); -#endif -} - - -LIBXSMM_HASH_API_DEFINITION unsigned int libxsmm_crc32_u32_sw(const void* value, unsigned int seed) -{ - const uint32_t u32 = *(const uint32_t*)value, s = seed ^ u32; - uint32_t c0, c1, c2, c3; - LIBXSMM_ASSERT(NULL != internal_crc32_table); - c0 = internal_crc32_table[0][(s>>24)&0xFF]; - c1 = internal_crc32_table[1][(s>>16)&0xFF]; - c2 = internal_crc32_table[2][(s>>8)&0xFF]; - c3 = internal_crc32_table[3][s&0xFF]; - return (c0 ^ c1) ^ (c2 ^ c3); -} - - -LIBXSMM_HASH_API_DEFINITION LIBXSMM_INTRINSICS(LIBXSMM_X86_SSE4) -unsigned int libxsmm_crc32_u32_sse4(const void* value, unsigned int seed) -{ -#if defined(LIBXSMM_INTRINSICS_SSE4) - return LIBXSMM_HASH_CRC32_U32(value, seed); -#else - return libxsmm_crc32_u32_sw(value, seed); + return internal_hash_u32_function(seed, value); #endif } -LIBXSMM_HASH_API_DEFINITION unsigned int libxsmm_crc32_u64(const void* value, unsigned int seed) +LIBXSMM_API_INTERN unsigned int libxsmm_crc32_u64(unsigned int seed, const void* value, ...) { -#if (LIBXSMM_X86_SSE4 <= LIBXSMM_STATIC_TARGET_ARCH) && !defined(LIBXSMM_HASH_SW) - return (unsigned int)LIBXSMM_HASH_CRC32_U64(value, seed); +#if (LIBXSMM_X86_SSE4 <= LIBXSMM_STATIC_TARGET_ARCH) + return (unsigned int)LIBXSMM_HASH_CRC32_U64(seed, value); #else /* pointer based function call */ LIBXSMM_ASSERT(NULL != internal_hash_u64_function); - return internal_hash_u64_function(value, seed); + return internal_hash_u64_function(seed, value); #endif } -LIBXSMM_HASH_API_DEFINITION unsigned int libxsmm_crc32_u64_sw(const void* value, unsigned int seed) +LIBXSMM_API_INTERN unsigned int libxsmm_crc32_u128(unsigned int seed, const void* value, ...) { - const uint32_t *const pu32 = (const uint32_t*)value; - seed = libxsmm_crc32_u32_sw(pu32 + 0, seed); - seed = libxsmm_crc32_u32_sw(pu32 + 1, seed); - return seed; -} - - -LIBXSMM_HASH_API_DEFINITION LIBXSMM_INTRINSICS(LIBXSMM_X86_SSE4) -unsigned int libxsmm_crc32_u64_sse4(const void* value, unsigned int seed) -{ -#if defined(LIBXSMM_INTRINSICS_SSE4) - return (unsigned int)LIBXSMM_HASH_CRC32_U64(value, seed); -#else - return libxsmm_crc32_u64_sw(value, seed); -#endif -} - - -LIBXSMM_HASH_API_DEFINITION unsigned int libxsmm_crc32_u128(const void* value, unsigned int seed) -{ -#if (LIBXSMM_X86_SSE4 <= LIBXSMM_STATIC_TARGET_ARCH) && !defined(LIBXSMM_HASH_SW) - return libxsmm_crc32_u128_sse4(value, seed); +#if (LIBXSMM_X86_SSE4 <= LIBXSMM_STATIC_TARGET_ARCH) + return internal_crc32_u128_sse4(seed, value); #else /* pointer based function call */ LIBXSMM_ASSERT(NULL != internal_hash_u128_function); - return internal_hash_u128_function(value, seed); + return internal_hash_u128_function(seed, value); #endif } -LIBXSMM_HASH_API_DEFINITION unsigned int libxsmm_crc32_u128_sw(const void* value, unsigned int seed) +LIBXSMM_API_INTERN unsigned int libxsmm_crc32_u256(unsigned int seed, const void* value, ...) { - const uint64_t *const pu64 = (const uint64_t*)value; - seed = libxsmm_crc32_u64_sw(pu64 + 0, seed); - seed = libxsmm_crc32_u64_sw(pu64 + 1, seed); - return seed; -} - - -LIBXSMM_HASH_API_DEFINITION LIBXSMM_INTRINSICS(LIBXSMM_X86_SSE4) -unsigned int libxsmm_crc32_u128_sse4(const void* value, unsigned int seed) -{ -#if defined(LIBXSMM_INTRINSICS_SSE4) - const uint64_t *const pu64 = (const uint64_t*)value; - return (unsigned int)LIBXSMM_HASH_CRC32_U64(pu64 + 1, LIBXSMM_HASH_CRC32_U64(pu64, seed)); -#else - return libxsmm_crc32_u128_sw(value, seed); -#endif -} - - -LIBXSMM_HASH_API_DEFINITION unsigned int libxsmm_crc32_u256(const void* value, unsigned int seed) -{ -#if (LIBXSMM_X86_SSE4 <= LIBXSMM_STATIC_TARGET_ARCH) && !defined(LIBXSMM_HASH_SW) - return libxsmm_crc32_u256_sse4(value, seed); +#if (LIBXSMM_X86_SSE4 <= LIBXSMM_STATIC_TARGET_ARCH) + return internal_crc32_u256_sse4(seed, value); #else /* pointer based function call */ LIBXSMM_ASSERT(NULL != internal_hash_u256_function); - return internal_hash_u256_function(value, seed); + return internal_hash_u256_function(seed, value); #endif } -LIBXSMM_HASH_API_DEFINITION unsigned int libxsmm_crc32_u256_sw(const void* value, unsigned int seed) +LIBXSMM_API_INTERN unsigned int libxsmm_crc32_u512(unsigned int seed, const void* value, ...) { - const uint8_t *const pu8 = (const uint8_t*)value; - seed = libxsmm_crc32_u128_sw(pu8 + 0x00, seed); - seed = libxsmm_crc32_u128_sw(pu8 + 0x10, seed); - return seed; -} - - -LIBXSMM_HASH_API_DEFINITION LIBXSMM_INTRINSICS(LIBXSMM_X86_SSE4) -unsigned int libxsmm_crc32_u256_sse4(const void* value, unsigned int seed) -{ -#if defined(LIBXSMM_INTRINSICS_SSE4) - const uint8_t *const pu8 = (const uint8_t*)value; - seed = libxsmm_crc32_u128_sse4(pu8 + 0x00, seed); - seed = libxsmm_crc32_u128_sse4(pu8 + 0x10, seed); - return seed; -#else - return libxsmm_crc32_u256_sw(value, seed); -#endif -} - - -LIBXSMM_HASH_API_DEFINITION unsigned int libxsmm_crc32_u512(const void* value, unsigned int seed) -{ -#if (LIBXSMM_X86_SSE4 <= LIBXSMM_STATIC_TARGET_ARCH) && !defined(LIBXSMM_HASH_SW) - return libxsmm_crc32_u512_sse4(value, seed); +#if (LIBXSMM_X86_SSE4 <= LIBXSMM_STATIC_TARGET_ARCH) + return internal_crc32_u512_sse4(seed, value); #else /* pointer based function call */ LIBXSMM_ASSERT(NULL != internal_hash_u256_function); - return internal_hash_u512_function(value, seed); + return internal_hash_u512_function(seed, value); #endif } -LIBXSMM_HASH_API_DEFINITION unsigned int libxsmm_crc32_u512_sw(const void* value, unsigned int seed) +LIBXSMM_API_INTERN unsigned int libxsmm_crc32(unsigned int seed, const void* data, size_t size) { - const uint8_t *const pu8 = (const uint8_t*)value; - seed = libxsmm_crc32_u256_sw(pu8 + 0x00, seed); - seed = libxsmm_crc32_u256_sw(pu8 + 0x20, seed); - return seed; -} - - -LIBXSMM_HASH_API_DEFINITION LIBXSMM_INTRINSICS(LIBXSMM_X86_SSE4) -unsigned int libxsmm_crc32_u512_sse4(const void* value, unsigned int seed) -{ -#if defined(LIBXSMM_INTRINSICS_SSE4) - const uint8_t *const pu8 = (const uint8_t*)value; - seed = libxsmm_crc32_u256_sse4(pu8 + 0x00, seed); - seed = libxsmm_crc32_u256_sse4(pu8 + 0x20, seed); - return seed; -#else - return libxsmm_crc32_u512_sw(value, seed); -#endif -} - - -LIBXSMM_HASH_API_DEFINITION unsigned int libxsmm_crc32(const void* data, size_t size, unsigned int seed) -{ -#if (LIBXSMM_X86_SSE4 <= LIBXSMM_STATIC_TARGET_ARCH) && !defined(LIBXSMM_HASH_SW) - return libxsmm_crc32_sse4(data, size, seed); +#if (LIBXSMM_X86_SSE4 <= LIBXSMM_STATIC_TARGET_ARCH) + return internal_crc32_sse4(seed, data, size); #else /* pointer based function call */ LIBXSMM_ASSERT(NULL != internal_hash_function); - return internal_hash_function(data, size, seed); -#endif -} - - -LIBXSMM_HASH_API_DEFINITION unsigned int libxsmm_crc32_sw(const void* data, size_t size, unsigned int seed) -{ - LIBXSMM_ASSERT(NULL != data || 0 == size); - LIBXSMM_HASH(libxsmm_crc32_u64_sw, libxsmm_crc32_u32_sw, internal_crc32_u16_sw, internal_crc32_u8_sw, data, size, seed); -} - - -LIBXSMM_HASH_API_DEFINITION LIBXSMM_INTRINSICS(LIBXSMM_X86_SSE4) -unsigned int libxsmm_crc32_sse4(const void* data, size_t size, unsigned int seed) -{ - LIBXSMM_ASSERT(NULL != data || 0 == size); -#if defined(LIBXSMM_INTRINSICS_SSE4) - LIBXSMM_HASH(LIBXSMM_HASH_CRC32_U64, LIBXSMM_HASH_CRC32_U32, LIBXSMM_HASH_CRC32_U16, LIBXSMM_HASH_CRC32_U8, data, size, seed); -#else - return libxsmm_crc32_sw(data, size, seed); + return internal_hash_function(seed, data, size); #endif } diff --git a/src/libxsmm_hash.h b/src/libxsmm_hash.h index 5df947d209..d392f64c3c 100644 --- a/src/libxsmm_hash.h +++ b/src/libxsmm_hash.h @@ -33,66 +33,30 @@ #include -#if !defined(LIBXSMM_HASH_SW) && 0 -# define LIBXSMM_HASH_SW -#endif - -#if defined(LIBXSMM_BUILD) && !defined(LIBXSMM_HASH_NOINLINE) -# define LIBXSMM_HASH_API LIBXSMM_API_INLINE -# define LIBXSMM_HASH_API_DEFINITION LIBXSMM_HASH_API LIBXSMM_ATTRIBUTE_UNUSED -#else -# define LIBXSMM_HASH_API LIBXSMM_API -# define LIBXSMM_HASH_API_DEFINITION LIBXSMM_API -#endif - /* Map number of Bytes to number of bits. */ #define libxsmm_crc32_b16 libxsmm_crc32_u128 #define libxsmm_crc32_b32 libxsmm_crc32_u256 +#define libxsmm_crc32_b48 libxsmm_crc32_u384 #define libxsmm_crc32_b64 libxsmm_crc32_u512 -/** Function type representing the CRC32 functionality (elemental/value form). */ -LIBXSMM_EXTERN_C typedef LIBXSMM_RETARGETABLE unsigned int (*libxsmm_hash_value_function)( - const void* /*value*/, unsigned int /*seed*/); -/** Function type representing the CRC32 functionality (taking an entire buffer). */ +/** Function type representing the CRC32 functionality. */ LIBXSMM_EXTERN_C typedef LIBXSMM_RETARGETABLE unsigned int (*libxsmm_hash_function)( - const void* /*data*/, size_t /*size*/, unsigned int /*seed*/); + unsigned int /*seed*/, const void* /*data*/, ... /*size*/); /** Initialize hash function module; not thread-safe. */ -LIBXSMM_HASH_API void libxsmm_hash_init(int target_arch); -LIBXSMM_HASH_API void libxsmm_hash_finalize(void); - -LIBXSMM_HASH_API unsigned int libxsmm_crc32_u32(const void* value, unsigned int seed); -LIBXSMM_HASH_API unsigned int libxsmm_crc32_u32_sw(const void* value, unsigned int seed); -LIBXSMM_HASH_API unsigned int libxsmm_crc32_u32_sse4(const void* value, unsigned int seed); - -LIBXSMM_HASH_API unsigned int libxsmm_crc32_u64(const void* value, unsigned int seed); -LIBXSMM_HASH_API unsigned int libxsmm_crc32_u64_sw(const void* value, unsigned int seed); -LIBXSMM_HASH_API unsigned int libxsmm_crc32_u64_sse4(const void* value, unsigned int seed); +LIBXSMM_API_INTERN void libxsmm_hash_init(int target_arch); +LIBXSMM_API_INTERN void libxsmm_hash_finalize(void); -LIBXSMM_HASH_API unsigned int libxsmm_crc32_u128(const void* value, unsigned int seed); -LIBXSMM_HASH_API unsigned int libxsmm_crc32_u128_sw(const void* value, unsigned int seed); -LIBXSMM_HASH_API unsigned int libxsmm_crc32_u128_sse4(const void* value, unsigned int seed); +LIBXSMM_API_INTERN unsigned int libxsmm_crc32_u32(unsigned int seed, const void* value, ...); +LIBXSMM_API_INTERN unsigned int libxsmm_crc32_u64(unsigned int seed, const void* value, ...); +LIBXSMM_API_INTERN unsigned int libxsmm_crc32_u128(unsigned int seed, const void* value, ...); +LIBXSMM_API_INTERN unsigned int libxsmm_crc32_u256(unsigned int seed, const void* value, ...); +LIBXSMM_API_INTERN unsigned int libxsmm_crc32_u384(unsigned int seed, const void* value, ...); +LIBXSMM_API_INTERN unsigned int libxsmm_crc32_u512(unsigned int seed, const void* value, ...); -LIBXSMM_HASH_API unsigned int libxsmm_crc32_u256(const void* value, unsigned int seed); -LIBXSMM_HASH_API unsigned int libxsmm_crc32_u256_sw(const void* value, unsigned int seed); -LIBXSMM_HASH_API unsigned int libxsmm_crc32_u256_sse4(const void* value, unsigned int seed); - -LIBXSMM_HASH_API unsigned int libxsmm_crc32_u512(const void* value, unsigned int seed); -LIBXSMM_HASH_API unsigned int libxsmm_crc32_u512_sw(const void* value, unsigned int seed); -LIBXSMM_HASH_API unsigned int libxsmm_crc32_u512_sse4(const void* value, unsigned int seed); - -/** Dispatched implementation which may (or may not) use a SIMD extension. */ -LIBXSMM_HASH_API unsigned int libxsmm_crc32(const void* data, size_t size, unsigned int seed); /** Calculate the CRC32 for a given quantity (size) of raw data according to the seed. */ -LIBXSMM_HASH_API unsigned int libxsmm_crc32_sw(const void* data, size_t size, unsigned int seed); -/** Similar to libxsmm_crc32_sw (uses CRC32 instructions available since SSE4.2). */ -LIBXSMM_HASH_API unsigned int libxsmm_crc32_sse4(const void* data, size_t size, unsigned int seed); - - -#if defined(LIBXSMM_BUILD) && !defined(LIBXSMM_HASH_NOINLINE) -# include "libxsmm_hash.c" -#endif +LIBXSMM_API_INTERN unsigned int libxsmm_crc32(unsigned int seed, const void* data, size_t size); #endif /*LIBXSMM_HASH_H*/ diff --git a/src/libxsmm_main.c b/src/libxsmm_main.c index 8342cf2687..d2404898a8 100644 --- a/src/libxsmm_main.c +++ b/src/libxsmm_main.c @@ -362,7 +362,7 @@ LIBXSMM_API_INLINE void internal_register_static_code( const libxsmm_gemm_descriptor *const desc = libxsmm_gemm_descriptor_dinit(&blob, precision, m, n, k, lda, ldb, ldc, LIBXSMM_ALPHA, LIBXSMM_BETA, LIBXSMM_FLAGS, INTERNAL_PREFETCH); unsigned int i = LIBXSMM_HASH_MOD( - libxsmm_crc32(desc, LIBXSMM_MIN(sizeof(libxsmm_gemm_descriptor), size), LIBXSMM_HASH_SEED), + libxsmm_crc32(LIBXSMM_HASH_SEED, desc, LIBXSMM_MIN(sizeof(libxsmm_gemm_descriptor), size)), LIBXSMM_CAPACITY_REGISTRY); libxsmm_code_pointer* dst_entry = registry + i; #if !defined(NDEBUG) @@ -1617,9 +1617,9 @@ LIBXSMM_API_INLINE libxsmm_code_pointer internal_find_code(libxsmm_descriptor* d #endif { #if defined(LIBXSMM_PAD_DESC) - unsigned int i = LIBXSMM_CONCATENATE(libxsmm_crc32_b, LIBXSMM_HASH_SIZE)(desc, LIBXSMM_HASH_SEED); + unsigned int i = LIBXSMM_CONCATENATE(libxsmm_crc32_b, LIBXSMM_HASH_SIZE)(LIBXSMM_HASH_SEED, desc); #else - unsigned int i = libxsmm_crc32(desc, LIBXSMM_MIN(size, LIBXSMM_HASH_SIZE), LIBXSMM_HASH_SEED); + unsigned int i = libxsmm_crc32(LIBXSMM_HASH_SEED, desc, LIBXSMM_MIN(size, LIBXSMM_HASH_SIZE)); #endif unsigned int i0 = i = LIBXSMM_HASH_MOD(i, LIBXSMM_CAPACITY_REGISTRY), mode = 0, diff = 1; LIBXSMM_ASSERT(NULL != internal_registry); diff --git a/src/libxsmm_malloc.c b/src/libxsmm_malloc.c index 928cfc0f62..db881a6aa8 100644 --- a/src/libxsmm_malloc.c +++ b/src/libxsmm_malloc.c @@ -430,9 +430,8 @@ LIBXSMM_API_INLINE internal_malloc_info_type* internal_malloc_info(const void* m #if defined(LIBXSMM_MALLOC_NOCRC) return result; #else /* calculate checksum over info */ - return (NULL != result && result->hash == libxsmm_crc32( - result, ((const char*)&result->hash) - ((const char*)result), - LIBXSMM_MALLOC_SEED)) ? result : NULL; + return (NULL != result && result->hash == libxsmm_crc32(LIBXSMM_MALLOC_SEED, result, + ((const char*)&result->hash) - ((const char*)result)) ? result : NULL); #endif } @@ -811,8 +810,9 @@ LIBXSMM_API_INTERN int libxsmm_xmalloc(void** memory, size_t size, size_t alignm info->size = size; info->flags = flags; #if !defined(LIBXSMM_MALLOC_NOCRC) /* calculate checksum over info */ - info->hash = libxsmm_crc32(info, /* info size minus actual hash value */ - (unsigned int)(((char*)&info->hash) - ((char*)info)), LIBXSMM_MALLOC_SEED); + info->hash = libxsmm_crc32(LIBXSMM_MALLOC_SEED, info, + /* info size minus actual hash value */ + (unsigned int)(((char*)&info->hash) - ((char*)info))); #endif *memory = aligned; } @@ -1048,8 +1048,9 @@ LIBXSMM_API_INTERN int libxsmm_malloc_attrib(void** memory, int flags, const cha info->pointer = info->reloc; info->reloc = NULL; # if !defined(LIBXSMM_MALLOC_NOCRC) /* update checksum */ - info->hash = libxsmm_crc32(info, /* info size minus actual hash value */ - (unsigned int)(((char*)&info->hash) - ((char*)info)), LIBXSMM_MALLOC_SEED); + info->hash = libxsmm_crc32(LIBXSMM_MALLOC_SEED, info, + /* info size minus actual hash value */ + (unsigned int)(((char*)&info->hash) - ((char*)info))); # endif /* treat memory protection errors as soft error; ignore return value */ munmap(buffer, alloc_size); #endif @@ -1058,8 +1059,9 @@ LIBXSMM_API_INTERN int libxsmm_malloc_attrib(void** memory, int flags, const cha else { /* malloc-based fall-back */ int mprotect_result; # if !defined(LIBXSMM_MALLOC_NOCRC) && defined(LIBXSMM_VTUNE) /* update checksum */ - info->hash = libxsmm_crc32(info, /* info size minus actual hash value */ - (unsigned int)(((char*)&info->hash) - ((char*)info)), LIBXSMM_MALLOC_SEED); + info->hash = libxsmm_crc32(LIBXSMM_MALLOC_SEED, info, + /* info size minus actual hash value */ + (unsigned int)(((char*)&info->hash) - ((char*)info))); # endif /* treat memory protection errors as soft error; ignore return value */ mprotect_result = mprotect(buffer, alloc_size/*entire memory region*/, PROT_READ | PROT_EXEC); if (EXIT_SUCCESS != mprotect_result) { @@ -1117,7 +1119,7 @@ LIBXSMM_API_INLINE const void* internal_malloc_site(const char* site) if (NULL != site) { #if !defined(LIBXSMM_STRING_POOLING) if ((LIBXSMM_MALLOC_SCRATCH_INTERNAL) != site) { - const uintptr_t hash = libxsmm_crc32(site, strlen(site), LIBXSMM_MALLOC_SEED); + const uintptr_t hash = libxsmm_crc32(LIBXSMM_MALLOC_SEED, site, strlen(site)); result = (const void*)((LIBXSMM_MALLOC_SCRATCH_INTERNAL_SITE) != hash ? hash : (hash - 1)); LIBXSMM_ASSERT((LIBXSMM_MALLOC_SCRATCH_INTERNAL) != result); } diff --git a/src/libxsmm_math.c b/src/libxsmm_math.c index 77e6c19503..caa6d27752 100644 --- a/src/libxsmm_math.c +++ b/src/libxsmm_math.c @@ -402,7 +402,7 @@ LIBXSMM_API unsigned int libxsmm_diff_npot(const void* a, const void* bn, unsign LIBXSMM_API unsigned int libxsmm_hash(const void* data, unsigned int size, unsigned int seed) { LIBXSMM_INIT - return libxsmm_crc32(data, size, seed); + return libxsmm_crc32(seed, data, size); } diff --git a/tests/hash.c b/tests/hash.c index afdb5ac7e2..05993baf82 100644 --- a/tests/hash.c +++ b/tests/hash.c @@ -34,6 +34,10 @@ # include #endif +#if !defined(ELEM_TYPE) +# define ELEM_TYPE int +#endif + /** * This test case is NOT an example of how to use LIBXSMM @@ -42,33 +46,22 @@ */ int main(void) { - const unsigned int seed = 1975; - unsigned int size = 2507, i, h1, h2; + const unsigned int seed = 1975, size = 2507; + const unsigned int n512 = 512 / (8 * sizeof(ELEM_TYPE)); + unsigned int s = LIBXSMM_UP(size, n512), i, h1, h2; int result = EXIT_SUCCESS; const int* value; - int *const data = (int*)libxsmm_malloc(sizeof(int) * size); - if (NULL == data) size = 0; - for (i = 0; i < size; ++i) data[i] = (rand() - ((RAND_MAX) >> 1)); - - h1 = libxsmm_crc32(data, sizeof(int) * size, seed); - h2 = libxsmm_crc32_sw(data, sizeof(int) * size, seed); - if (h1 != h2) { -#if defined(_DEBUG) - fprintf(stderr, "(crc32=%u) != (crc32_sw=%u)\n", h1, h2); -#endif - result = EXIT_FAILURE; - } + ELEM_TYPE *const data = (ELEM_TYPE*)libxsmm_malloc(sizeof(ELEM_TYPE) * s); + if (NULL == data) s = 0; + for (i = 0; i < s; ++i) data[i] = (rand() - ((RAND_MAX) >> 1)); - size >>= 4; - value = data; - h1 = h2 = seed; - for (i = 0; i < size; ++i) { - h1 = libxsmm_crc32_u512(value, h1); - h2 = libxsmm_crc32_u512_sw(value, h2); - value += 16; + h1 = libxsmm_crc32(seed, data, sizeof(ELEM_TYPE) * s); + h2 = seed; value = data; + for (i = 0; i < s; i += n512) { + h2 = libxsmm_crc32_u512(h2, value + i); } - if (h1 != h2 || h1 != libxsmm_crc32(data, sizeof(int) * 16 * size, seed)) { + if (h1 != h2) { #if defined(_DEBUG) fprintf(stderr, "(crc32=%u) != (crc32_sw=%u)\n", h1, h2); #endif diff --git a/version.txt b/version.txt index 9beaddb16d..6e04678552 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -master-1.10-972 +master-1.10-973