Skip to content

Commit

Permalink
Use faster any_hash logic in rb_hash
Browse files Browse the repository at this point in the history
From the documentation of rb_obj_hash:

> Certain core classes such as Integer use built-in hash calculations and
> do not call the #hash method when used as a hash key.

So if you override, say, Integer#hash it won't be used from rb_hash_aref
and similar. This avoids method lookups in many common cases.

This commit uses the same optimization in rb_hash, a method used
internally and in the C API to get the hash value of an object. Usually
this is used to build the hash of an object based on its elements.
Previously it would always do a method lookup for 'hash'.

This is primarily intended to speed up hashing of Arrays and Hashes,
which call rb_hash for each element.

    compare-ruby: ruby 3.0.1p64 (2021-04-05 revision 0fb782e) [x86_64-linux]
    built-ruby: ruby 3.1.0dev (2021-09-29T02:13:24Z fast_hash d670bf88b2) [x86_64-linux]
    # Iteration per second (i/s)

    |                 |compare-ruby|built-ruby|
    |:----------------|-----------:|---------:|
    |hash_aref_array  |       1.008|     1.769|
    |                 |           -|     1.76x|
  • Loading branch information
jhawthorn authored and tenderlove committed Sep 30, 2021
1 parent 529fc20 commit bb488a1
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 29 deletions.
5 changes: 5 additions & 0 deletions benchmark/hash_aref_array.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
h = {}
arrays = (0..99).each_slice(10).to_a
#STDERR.puts arrays.inspect
arrays.each { |s| h[s] = s }
200_000.times { arrays.each { |s| h[s] } }
59 changes: 30 additions & 29 deletions hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,33 +122,6 @@ hash_recursive(VALUE obj, VALUE arg, int recurse)
return rb_funcallv(obj, id_hash, 0, 0);
}

VALUE
rb_hash(VALUE obj)
{
VALUE hval = rb_check_funcall_basic_kw(obj, id_hash, rb_mKernel, 0, 0, 0);

if (hval == Qundef) {
hval = rb_exec_recursive_outer(hash_recursive, obj, 0);
}

while (!FIXNUM_P(hval)) {
if (RB_BIGNUM_TYPE_P(hval)) {
int sign;
unsigned long ul;
sign = rb_integer_pack(hval, &ul, 1, sizeof(ul), 0,
INTEGER_PACK_NATIVE_BYTE_ORDER);
if (sign < 0) {
hval = LONG2FIX(ul | FIXNUM_MIN);
}
else {
hval = LONG2FIX(ul & FIXNUM_MAX);
}
}
hval = rb_to_int(hval);
}
return hval;
}

static long rb_objid_hash(st_index_t index);

static st_index_t
Expand Down Expand Up @@ -216,8 +189,29 @@ any_hash(VALUE a, st_index_t (*other_func)(VALUE))
static st_index_t
obj_any_hash(VALUE obj)
{
obj = rb_hash(obj);
return FIX2LONG(obj);
VALUE hval = rb_check_funcall_basic_kw(obj, id_hash, rb_mKernel, 0, 0, 0);

if (hval == Qundef) {
hval = rb_exec_recursive_outer(hash_recursive, obj, 0);
}

while (!FIXNUM_P(hval)) {
if (RB_TYPE_P(hval, T_BIGNUM)) {
int sign;
unsigned long ul;
sign = rb_integer_pack(hval, &ul, 1, sizeof(ul), 0,
INTEGER_PACK_NATIVE_BYTE_ORDER);
if (sign < 0) {
hval = LONG2FIX(ul | FIXNUM_MIN);
}
else {
hval = LONG2FIX(ul & FIXNUM_MAX);
}
}
hval = rb_to_int(hval);
}

return FIX2LONG(hval);
}

static st_index_t
Expand All @@ -226,6 +220,13 @@ rb_any_hash(VALUE a)
return any_hash(a, obj_any_hash);
}

VALUE
rb_hash(VALUE obj)
{
return LONG2FIX(any_hash(obj, obj_any_hash));
}


/* Here is a hash function for 64-bit key. It is about 5 times faster
(2 times faster when uint128 type is absent) on Haswell than
tailored Spooky or City hash function can be. */
Expand Down

0 comments on commit bb488a1

Please sign in to comment.