Implement special conversions for 64-bit integers

This change improves the conversion speed from small integers. ``` Comparison: big_n9 master: 4003688.9 i/s bigdecimal 3.0.0: 1270551.0 i/s - 3.15x slower big_n19 master: 5410096.4 i/s bigdecimal 3.0.0: 1000250.3 i/s - 5.41x slower ```
ruby · Jan 1, 2021 · 3429bd7 · 3429bd7
1 parent 1d30d5a
commit 3429bd7
Show file tree

Hide file tree

Showing 7 changed files with 378 additions and 7 deletions.
diff --git a/bigdecimal.gemspec b/bigdecimal.gemspec
@@ -19,6 +19,9 @@ Gem::Specification.new do |s|
     bigdecimal.gemspec
     ext/bigdecimal/bigdecimal.c
     ext/bigdecimal/bigdecimal.h
+    ext/bigdecimal/bits.h
+    ext/bigdecimal/feature.h
+    ext/bigdecimal/static_assert.h
     lib/bigdecimal.rb
     lib/bigdecimal/jacobian.rb
     lib/bigdecimal/ludcmp.rb

diff --git a/ext/bigdecimal/bigdecimal.c b/ext/bigdecimal/bigdecimal.c
@@ -30,18 +30,19 @@
 #include <ieeefp.h>
 #endif
 
+#include "bits.h"
+#include "static_assert.h"
+
 /* #define ENABLE_NUMERIC_STRING */
 
-#define MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, min, max) ( \
-    (a) == 0 ? 0 : \
-    (a) == -1 ? (b) < -(max) : \
-    (a) > 0 ? \
-      ((b) > 0 ? (max) / (a) < (b) : (min) / (a) > (b)) : \
-      ((b) > 0 ? (min) / (a) < (b) : (max) / (a) > (b)))
 #define SIGNED_VALUE_MAX INTPTR_MAX
 #define SIGNED_VALUE_MIN INTPTR_MIN
 #define MUL_OVERFLOW_SIGNED_VALUE_P(a, b) MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, SIGNED_VALUE_MIN, SIGNED_VALUE_MAX)
 
+#define numberof(array) ((int)(sizeof(array) / sizeof((array)[0])))
+#define roomof(x, y) (((x) + (y) - 1) / (y))
+#define type_roomof(x, y) roomof(sizeof(x), sizeof(y))
+
 VALUE rb_cBigDecimal;
 VALUE rb_mBigMath;
 
@@ -80,6 +81,8 @@ static ID id_half;
 #define DBLE_FIG rmpd_double_figures()    /* figure of double */
 #endif
 
+#define LOG10_2 0.3010299956639812
+
 #ifndef RRATIONAL_ZERO_P
 # define RRATIONAL_ZERO_P(x) (FIXNUM_P(rb_rational_num(x)) && \
 			      FIX2LONG(rb_rational_num(x)) == 0)
@@ -2754,12 +2757,80 @@ check_exception(VALUE bd)
 }
 
 static VALUE
-rb_inum_convert_to_BigDecimal(VALUE val, RB_UNUSED_VAR(size_t digs), int raise_exception)
+rb_uint64_convert_to_BigDecimal(uint64_t uval, RB_UNUSED_VAR(size_t digs), int raise_exception)
+{
+    VALUE obj = TypedData_Wrap_Struct(rb_cBigDecimal, &BigDecimal_data_type, 0);
+
+    Real *vp;
+    if (uval == 0) {
+        vp = VpAllocReal(1);
+        vp->MaxPrec = 1;
+        vp->Prec = 1;
+        vp->exponent = 1;
+        VpSetZero(vp, 1);
+        vp->frac[0] = 0;
+    }
+    else if (uval < BASE) {
+        vp = VpAllocReal(1);
+        vp->MaxPrec = 1;
+        vp->Prec = 1;
+        vp->exponent = 1;
+        VpSetSign(vp, 1);
+        vp->frac[0] = (BDIGIT)uval;
+    }
+    else {
+        const size_t len10 = ceil(LOG10_2 * bit_length(uval));
+        size_t len = roomof(len10, BASE_FIG);
+
+        vp = VpAllocReal(len);
+        vp->MaxPrec = len;
+        vp->Prec = len;
+        vp->exponent = len;
+        VpSetSign(vp, 1);
+
+        size_t i;
+        for (i = 0; i < len; ++i) {
+            BDIGIT r = uval % BASE;
+            vp->frac[len - i - 1] = r;
+            uval /= BASE;
+        }
+    }
+
+    return BigDecimal_wrap_struct(obj, vp);
+}
+
+static VALUE
+rb_int64_convert_to_BigDecimal(int64_t ival, size_t digs, int raise_exception)
+{
+    const uint64_t uval = (ival < 0) ? (((uint64_t)-(ival+1))+1) : (uint64_t)ival;
+    VALUE bd = rb_uint64_convert_to_BigDecimal(uval, digs, raise_exception);
+    if (ival < 0) {
+        Real *vp;
+        TypedData_Get_Struct(bd, Real, &BigDecimal_data_type, vp);
+        VpSetSign(vp, -1);
+    }
+    return bd;
+}
+
+static VALUE
+rb_big_convert_to_BigDecimal(VALUE val, RB_UNUSED_VAR(size_t digs), int raise_exception)
 {
     Real *vp = GetVpValue(val, 1);
     return check_exception(vp->obj);
 }
 
+static VALUE
+rb_inum_convert_to_BigDecimal(VALUE val, RB_UNUSED_VAR(size_t digs), int raise_exception)
+{
+    assert(RB_INTEGER_TYPE_P(val));
+    if (FIXNUM_P(val)) {
+        return rb_int64_convert_to_BigDecimal(FIX2LONG(val), digs, raise_exception);
+    }
+    else {
+        return rb_big_convert_to_BigDecimal(val, digs, raise_exception);
+    }
+}
+
 static VALUE
 rb_float_convert_to_BigDecimal(VALUE val, size_t digs, int raise_exception)
 {

diff --git a/ext/bigdecimal/bits.h b/ext/bigdecimal/bits.h
@@ -0,0 +1,137 @@
+#ifndef BIGDECIMAL_BITS_H
+#define BIGDECIMAL_BITS_H
+
+#include "feature.h"
+#include "static_assert.h"
+
+#if defined(HAVE_X86INTRIN_H)
+# include <x86intrin.h>         /* for _lzcnt_u64 */
+#elif defined(_MSC_VER) && _MSC_VER >= 1310
+# include <intrin.h>            /* for the following intrinsics */
+#endif
+
+#if defined(_MSC_VER) && defined(__AVX2__)
+# pragma intrinsic(__lzcnt)
+# pragma intrinsic(__lzcnt64)
+#endif
+
+#define MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, min, max) ( \
+    (a) == 0 ? 0 : \
+    (a) == -1 ? (b) < -(max) : \
+    (a) > 0 ? \
+      ((b) > 0 ? (max) / (a) < (b) : (min) / (a) > (b)) : \
+      ((b) > 0 ? (min) / (a) < (b) : (max) / (a) > (b)))
+
+#ifdef HAVE_UINT128_T
+# define bit_length(x) \
+    (unsigned int) \
+    (sizeof(x) <= sizeof(int32_t) ? 32 - nlz_int32((uint32_t)(x)) : \
+     sizeof(x) <= sizeof(int64_t) ? 64 - nlz_int64((uint64_t)(x)) : \
+                                   128 - nlz_int128((uint128_t)(x)))
+#else
+# define bit_length(x) \
+    (unsigned int) \
+    (sizeof(x) <= sizeof(int32_t) ? 32 - nlz_int32((uint32_t)(x)) : \
+                                    64 - nlz_int64((uint64_t)(x)))
+#endif
+
+static inline unsigned nlz_int32(uint32_t x);
+static inline unsigned nlz_int64(uint64_t x);
+#ifdef HAVE_UINT128_T
+static inline unsigned nlz_int128(uint128_t x);
+#endif
+
+static inline unsigned int
+nlz_int32(uint32_t x)
+{
+#if defined(_MSC_VER) && defined(__AVX2__)
+    /* Note: It seems there is no such thing like __LZCNT__ predefined in MSVC.
+     * AMD  CPUs have  had this  instruction for  decades (since  K10) but  for
+     * Intel, Haswell is  the oldest one.  We need to  use __AVX2__ for maximum
+     * safety. */
+    return (unsigned int)__lzcnt(x);
+
+#elif defined(__x86_64__) && defined(__LZCNT__) /* && ! defined(MJIT_HEADER) */
+    return (unsigned int)_lzcnt_u32(x);
+
+#elif defined(_MSC_VER) && _MSC_VER >= 1400 /* &&! defined(__AVX2__) */
+    unsigned long r;
+    return _BitScanReverse(&r, x) ? (31 - (int)r) : 32;
+
+#elif __has_builtin(__builtin_clz)
+    STATIC_ASSERT(sizeof_int, sizeof(int) * CHAR_BIT == 32);
+    return x ? (unsigned int)__builtin_clz(x) : 32;
+
+#else
+    uint32_t y;
+    unsigned n = 32;
+    y = x >> 16; if (y) {n -= 16; x = y;}
+    y = x >>  8; if (y) {n -=  8; x = y;}
+    y = x >>  4; if (y) {n -=  4; x = y;}
+    y = x >>  2; if (y) {n -=  2; x = y;}
+    y = x >>  1; if (y) {return n - 2;}
+    return (unsigned int)(n - x);
+#endif
+}
+
+static inline unsigned int
+nlz_int64(uint64_t x)
+{
+#if defined(_MSC_VER) && defined(__AVX2__)
+    return (unsigned int)__lzcnt64(x);
+
+#elif defined(__x86_64__) && defined(__LZCNT__) /* && ! defined(MJIT_HEADER) */
+    return (unsigned int)_lzcnt_u64(x);
+
+#elif defined(_WIN64) && defined(_MSC_VER) && _MSC_VER >= 1400 /* &&! defined(__AVX2__) */
+    unsigned long r;
+    return _BitScanReverse64(&r, x) ? (63u - (unsigned int)r) : 64;
+
+#elif __has_builtin(__builtin_clzl)
+    if (x == 0) {
+        return 64;
+    }
+    else if (sizeof(long) * CHAR_BIT == 64) {
+        return (unsigned int)__builtin_clzl((unsigned long)x);
+    }
+    else if (sizeof(long long) * CHAR_BIT == 64) {
+        return (unsigned int)__builtin_clzll((unsigned long long)x);
+    }
+    else {
+        /* :FIXME: Is there a way to make this branch a compile-time error? */
+        __builtin_unreachable();
+    }
+
+#else
+    uint64_t y;
+    unsigned int n = 64;
+    y = x >> 32; if (y) {n -= 32; x = y;}
+    y = x >> 16; if (y) {n -= 16; x = y;}
+    y = x >>  8; if (y) {n -=  8; x = y;}
+    y = x >>  4; if (y) {n -=  4; x = y;}
+    y = x >>  2; if (y) {n -=  2; x = y;}
+    y = x >>  1; if (y) {return n - 2;}
+    return (unsigned int)(n - x);
+
+#endif
+}
+
+#ifdef HAVE_UINT128_T
+static inline unsigned int
+nlz_int128(uint128_t x)
+{
+    uint64_t y = (uint64_t)(x >> 64);
+
+    if (x == 0) {
+        return 128;
+    }
+    else if (y == 0) {
+        return (unsigned int)nlz_int64(x) + 64;
+    }
+    else {
+        return (unsigned int)nlz_int64(y);
+    }
+}
+#endif
+
+#endif /* BIGDECIMAL_BITS_H */
diff --git a/ext/bigdecimal/extconf.rb b/ext/bigdecimal/extconf.rb
@@ -16,6 +16,20 @@ def check_bigdecimal_version(gemspec_path)
   message "#{bigdecimal_version}\n"
 end
 
+def have_builtin_func(name, check_expr, opt = "", &b)
+  checking_for checking_message(name.funcall_style, nil, opt) do
+    if try_compile(<<SRC, opt, &b)
+int foo;
+int main() { #{check_expr}; return 0; }
+SRC
+      $defs.push(format("-DHAVE_BUILTIN_%s", name.tr_cpp))
+      true
+    else
+      false
+    end
+  end
+end
+
 gemspec_name = gemspec_path = nil
 unless ['', '../../'].any? {|dir|
          gemspec_name = "#{dir}bigdecimal.gemspec"
@@ -28,13 +42,20 @@ def check_bigdecimal_version(gemspec_path)
 
 check_bigdecimal_version(gemspec_path)
 
+have_builtin_func("__builtin_clz", "__builtin_clz(0)")
+have_builtin_func("__builtin_clzl", "__builtin_clzl(0)")
+
 have_header("stdbool.h")
+have_header("x86intrin.h")
 
 have_func("labs", "stdlib.h")
 have_func("llabs", "stdlib.h")
 have_func("finite", "math.h")
 have_func("isfinite", "math.h")
 
+have_header("ruby/internal/has/builtin.h")
+have_header("ruby/internal/static_assert.h")
+
 have_type("struct RRational", "ruby.h")
 have_func("rb_rational_num", "ruby.h")
 have_func("rb_rational_den", "ruby.h")

diff --git a/ext/bigdecimal/feature.h b/ext/bigdecimal/feature.h
@@ -0,0 +1,68 @@
+#ifndef BIGDECIMAL_HAS_FEATURE_H
+#define BIGDECIMAL_HAS_FEATURE_H
+
+/* ======== __has_feature ======== */
+
+#ifndef __has_feature
+# define __has_feature(_) 0
+#endif
+
+/* ======== __has_extension ======== */
+
+#ifndef __has_extension
+# define __has_extension __has_feature
+#endif
+
+/* ======== __has_builtin ======== */
+
+#ifdef HAVE_RUBY_INTERNAL_HAS_BUILTIN_H
+# include <ruby/internal/has/builtin.h>
+#endif
+
+#ifdef RBIMPL_HAS_BUILTIN
+# define BIGDECIMAL_HAS_BUILTIN(...) RBIMPL_HAS_BUILTIN(__VA_ARGS__)
+
+#else
+# /* The following section is copied from CRuby's builtin.h */
+#
+# ifdef __has_builtin
+#  if defined(__INTEL_COMPILER)
+#  /* :TODO: Intel  C Compiler  has __has_builtin (since  19.1 maybe?),  and is
+#   * reportedly  broken.  We  have to  skip them.   However the  situation can
+#   * change.  They might improve someday.  We need to revisit here later. */
+#  elif defined(__GNUC__) && ! __has_builtin(__builtin_alloca)
+#  /* FreeBSD's   <sys/cdefs.h>   defines   its   own   *broken*   version   of
+#   * __has_builtin.   Cygwin  copied  that  content  to be  a  victim  of  the
+#   * broken-ness.  We don't take them into account. */
+#  else
+#   define HAVE___HAS_BUILTIN 1
+#  endif
+# endif
+#
+# if defined(HAVE___HAS_BUILTIN)
+#  define BIGDECIMAL_HAS_BUILTIN(_) __has_builtin(_)
+#
+# elif defined(__GNUC__)
+#  define BIGDECIMAL_HAS_BUILTIN(_) BIGDECIMAL_HAS_BUILTIN_ ## _
+#  if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 6))
+#   define BIGDECIMAL_HAS_BUILTIN___builtin_clz  1
+#   define BIGDECIMAL_HAS_BUILTIN___builtin_clzl 1
+#  else
+#   define BIGDECIMAL_HAS_BUILTIN___builtin_clz  0
+#   define BIGDECIMAL_HAS_BUILTIN___builtin_clzl 0
+#  endif
+# elif defined(_MSC_VER)
+#  define BIGDECIMAL_HAS_BUILTIN(_) 0
+#
+# else
+#  define BIGDECIMAL_HAS_BUILTIN(_) BIGDECIMAL_HAS_BUILTIN_ ## _
+#  define BIGDECIMAL_HAS_BUILTIN___builtin_clz   HAVE_BUILTIN___BUILTIN_CLZ
+#  define BIGDECIMAL_HAS_BUILTIN___builtin_clzl  HAVE_BUILTIN___BUILTIN_CLZL
+# endif
+#endif /* RBIMPL_HAS_BUILTIN */
+
+#ifndef __has_builtin
+# define __has_builtin(...) BIGDECIMAL_HAS_BUILTIN(__VA_ARGS__)
+#endif
+
+#endif /* BIGDECIMAL_HAS_FEATURE_H */