Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement special conversions for 64-bit integers
This change improves the conversion speed from small integers. ``` Comparison: big_n9 master: 4003688.9 i/s bigdecimal 3.0.0: 1270551.0 i/s - 3.15x slower big_n19 master: 5410096.4 i/s bigdecimal 3.0.0: 1000250.3 i/s - 5.41x slower ```
- Loading branch information
Showing
7 changed files
with
378 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
#ifndef BIGDECIMAL_BITS_H | ||
#define BIGDECIMAL_BITS_H | ||
|
||
#include "feature.h" | ||
#include "static_assert.h" | ||
|
||
#if defined(HAVE_X86INTRIN_H) | ||
# include <x86intrin.h> /* for _lzcnt_u64 */ | ||
#elif defined(_MSC_VER) && _MSC_VER >= 1310 | ||
# include <intrin.h> /* for the following intrinsics */ | ||
#endif | ||
|
||
#if defined(_MSC_VER) && defined(__AVX2__) | ||
# pragma intrinsic(__lzcnt) | ||
# pragma intrinsic(__lzcnt64) | ||
#endif | ||
|
||
#define MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, min, max) ( \ | ||
(a) == 0 ? 0 : \ | ||
(a) == -1 ? (b) < -(max) : \ | ||
(a) > 0 ? \ | ||
((b) > 0 ? (max) / (a) < (b) : (min) / (a) > (b)) : \ | ||
((b) > 0 ? (min) / (a) < (b) : (max) / (a) > (b))) | ||
|
||
#ifdef HAVE_UINT128_T | ||
# define bit_length(x) \ | ||
(unsigned int) \ | ||
(sizeof(x) <= sizeof(int32_t) ? 32 - nlz_int32((uint32_t)(x)) : \ | ||
sizeof(x) <= sizeof(int64_t) ? 64 - nlz_int64((uint64_t)(x)) : \ | ||
128 - nlz_int128((uint128_t)(x))) | ||
#else | ||
# define bit_length(x) \ | ||
(unsigned int) \ | ||
(sizeof(x) <= sizeof(int32_t) ? 32 - nlz_int32((uint32_t)(x)) : \ | ||
64 - nlz_int64((uint64_t)(x))) | ||
#endif | ||
|
||
static inline unsigned nlz_int32(uint32_t x); | ||
static inline unsigned nlz_int64(uint64_t x); | ||
#ifdef HAVE_UINT128_T | ||
static inline unsigned nlz_int128(uint128_t x); | ||
#endif | ||
|
||
static inline unsigned int | ||
nlz_int32(uint32_t x) | ||
{ | ||
#if defined(_MSC_VER) && defined(__AVX2__) | ||
/* Note: It seems there is no such thing like __LZCNT__ predefined in MSVC. | ||
* AMD CPUs have had this instruction for decades (since K10) but for | ||
* Intel, Haswell is the oldest one. We need to use __AVX2__ for maximum | ||
* safety. */ | ||
return (unsigned int)__lzcnt(x); | ||
|
||
#elif defined(__x86_64__) && defined(__LZCNT__) /* && ! defined(MJIT_HEADER) */ | ||
return (unsigned int)_lzcnt_u32(x); | ||
|
||
#elif defined(_MSC_VER) && _MSC_VER >= 1400 /* &&! defined(__AVX2__) */ | ||
unsigned long r; | ||
return _BitScanReverse(&r, x) ? (31 - (int)r) : 32; | ||
|
||
#elif __has_builtin(__builtin_clz) | ||
STATIC_ASSERT(sizeof_int, sizeof(int) * CHAR_BIT == 32); | ||
return x ? (unsigned int)__builtin_clz(x) : 32; | ||
|
||
#else | ||
uint32_t y; | ||
unsigned n = 32; | ||
y = x >> 16; if (y) {n -= 16; x = y;} | ||
y = x >> 8; if (y) {n -= 8; x = y;} | ||
y = x >> 4; if (y) {n -= 4; x = y;} | ||
y = x >> 2; if (y) {n -= 2; x = y;} | ||
y = x >> 1; if (y) {return n - 2;} | ||
return (unsigned int)(n - x); | ||
#endif | ||
} | ||
|
||
static inline unsigned int | ||
nlz_int64(uint64_t x) | ||
{ | ||
#if defined(_MSC_VER) && defined(__AVX2__) | ||
return (unsigned int)__lzcnt64(x); | ||
|
||
#elif defined(__x86_64__) && defined(__LZCNT__) /* && ! defined(MJIT_HEADER) */ | ||
return (unsigned int)_lzcnt_u64(x); | ||
|
||
#elif defined(_WIN64) && defined(_MSC_VER) && _MSC_VER >= 1400 /* &&! defined(__AVX2__) */ | ||
unsigned long r; | ||
return _BitScanReverse64(&r, x) ? (63u - (unsigned int)r) : 64; | ||
|
||
#elif __has_builtin(__builtin_clzl) | ||
if (x == 0) { | ||
return 64; | ||
} | ||
else if (sizeof(long) * CHAR_BIT == 64) { | ||
return (unsigned int)__builtin_clzl((unsigned long)x); | ||
} | ||
else if (sizeof(long long) * CHAR_BIT == 64) { | ||
return (unsigned int)__builtin_clzll((unsigned long long)x); | ||
} | ||
else { | ||
/* :FIXME: Is there a way to make this branch a compile-time error? */ | ||
__builtin_unreachable(); | ||
} | ||
|
||
#else | ||
uint64_t y; | ||
unsigned int n = 64; | ||
y = x >> 32; if (y) {n -= 32; x = y;} | ||
y = x >> 16; if (y) {n -= 16; x = y;} | ||
y = x >> 8; if (y) {n -= 8; x = y;} | ||
y = x >> 4; if (y) {n -= 4; x = y;} | ||
y = x >> 2; if (y) {n -= 2; x = y;} | ||
y = x >> 1; if (y) {return n - 2;} | ||
return (unsigned int)(n - x); | ||
|
||
#endif | ||
} | ||
|
||
#ifdef HAVE_UINT128_T | ||
static inline unsigned int | ||
nlz_int128(uint128_t x) | ||
{ | ||
uint64_t y = (uint64_t)(x >> 64); | ||
|
||
if (x == 0) { | ||
return 128; | ||
} | ||
else if (y == 0) { | ||
return (unsigned int)nlz_int64(x) + 64; | ||
} | ||
else { | ||
return (unsigned int)nlz_int64(y); | ||
} | ||
} | ||
#endif | ||
|
||
#endif /* BIGDECIMAL_BITS_H */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
#ifndef BIGDECIMAL_HAS_FEATURE_H | ||
#define BIGDECIMAL_HAS_FEATURE_H | ||
|
||
/* ======== __has_feature ======== */ | ||
|
||
#ifndef __has_feature | ||
# define __has_feature(_) 0 | ||
#endif | ||
|
||
/* ======== __has_extension ======== */ | ||
|
||
#ifndef __has_extension | ||
# define __has_extension __has_feature | ||
#endif | ||
|
||
/* ======== __has_builtin ======== */ | ||
|
||
#ifdef HAVE_RUBY_INTERNAL_HAS_BUILTIN_H | ||
# include <ruby/internal/has/builtin.h> | ||
#endif | ||
|
||
#ifdef RBIMPL_HAS_BUILTIN | ||
# define BIGDECIMAL_HAS_BUILTIN(...) RBIMPL_HAS_BUILTIN(__VA_ARGS__) | ||
|
||
#else | ||
# /* The following section is copied from CRuby's builtin.h */ | ||
# | ||
# ifdef __has_builtin | ||
# if defined(__INTEL_COMPILER) | ||
# /* :TODO: Intel C Compiler has __has_builtin (since 19.1 maybe?), and is | ||
# * reportedly broken. We have to skip them. However the situation can | ||
# * change. They might improve someday. We need to revisit here later. */ | ||
# elif defined(__GNUC__) && ! __has_builtin(__builtin_alloca) | ||
# /* FreeBSD's <sys/cdefs.h> defines its own *broken* version of | ||
# * __has_builtin. Cygwin copied that content to be a victim of the | ||
# * broken-ness. We don't take them into account. */ | ||
# else | ||
# define HAVE___HAS_BUILTIN 1 | ||
# endif | ||
# endif | ||
# | ||
# if defined(HAVE___HAS_BUILTIN) | ||
# define BIGDECIMAL_HAS_BUILTIN(_) __has_builtin(_) | ||
# | ||
# elif defined(__GNUC__) | ||
# define BIGDECIMAL_HAS_BUILTIN(_) BIGDECIMAL_HAS_BUILTIN_ ## _ | ||
# if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 6)) | ||
# define BIGDECIMAL_HAS_BUILTIN___builtin_clz 1 | ||
# define BIGDECIMAL_HAS_BUILTIN___builtin_clzl 1 | ||
# else | ||
# define BIGDECIMAL_HAS_BUILTIN___builtin_clz 0 | ||
# define BIGDECIMAL_HAS_BUILTIN___builtin_clzl 0 | ||
# endif | ||
# elif defined(_MSC_VER) | ||
# define BIGDECIMAL_HAS_BUILTIN(_) 0 | ||
# | ||
# else | ||
# define BIGDECIMAL_HAS_BUILTIN(_) BIGDECIMAL_HAS_BUILTIN_ ## _ | ||
# define BIGDECIMAL_HAS_BUILTIN___builtin_clz HAVE_BUILTIN___BUILTIN_CLZ | ||
# define BIGDECIMAL_HAS_BUILTIN___builtin_clzl HAVE_BUILTIN___BUILTIN_CLZL | ||
# endif | ||
#endif /* RBIMPL_HAS_BUILTIN */ | ||
|
||
#ifndef __has_builtin | ||
# define __has_builtin(...) BIGDECIMAL_HAS_BUILTIN(__VA_ARGS__) | ||
#endif | ||
|
||
#endif /* BIGDECIMAL_HAS_FEATURE_H */ |
Oops, something went wrong.