Skip to content

Commit

Permalink
Stop making a redundant hash copy in Hash#dup (#2489)
Browse files Browse the repository at this point in the history
* Stop making a redundant hash copy in Hash#dup

It was making a copy of the hash without rehashing, then created an
extra copy of the hash to do the rehashing.  Since rehashing creates
a new copy already, this change just uses that rehashing to make
the copy.

[Bug #16121]

* Remove redundant Check_Type after to_hash

* Fix freeing and clearing destination hash in Hash#initialize_copy

The code was assuming the state of the destination hash based on the
source hash for clearing any existing table on it. If these don't match,
then that can cause the old table to be leaked. This can be seen by
compiling hash.c with `#define HASH_DEBUG 1` and running the following
script, which will crash from a debug assertion.

```ruby
h = 9.times.map { |i| [i, i] }.to_h
h.send(:initialize_copy, {})
```

* Remove dead code paths in rb_hash_initialize_copy

Given that `RHASH_ST_TABLE_P(h)` is defined as `(!RHASH_AR_TABLE_P(h))`
it shouldn't be possible for a hash to be neither of these, so there
is no need for the removed `else if` blocks.

* Share implementation between Hash#replace and Hash#initialize_copy

This also fixes key rehashing for small hashes backed by an array
table for Hash#replace.  This used to be done consistently in ruby
2.5.x, but stopped being done for small arrays in ruby 2.6.x.

This also bring optimization improvements that were done for
Hash#initialize_copy to Hash#replace.

* Add the Hash#dup benchmark
  • Loading branch information
dylanahsmith authored and ko1 committed Oct 21, 2019
1 parent 8b8b9c1 commit b970259
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 55 deletions.
8 changes: 8 additions & 0 deletions benchmark/hash_dup.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
prelude: |
small_hash = { a: 1 }
larger_hash = 20.times.map { |i| [('a'.ord + i).chr.to_sym, i] }.to_h
benchmark:
dup_small: small_hash.dup
dup_larger: larger_hash.dup
loop_count: 10000
76 changes: 21 additions & 55 deletions hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -2808,49 +2808,6 @@ rb_hash_aset(VALUE hash, VALUE key, VALUE val)
return val;
}

static int
replace_i(VALUE key, VALUE val, VALUE hash)
{
rb_hash_aset(hash, key, val);

return ST_CONTINUE;
}

/* :nodoc: */
static VALUE
rb_hash_initialize_copy(VALUE hash, VALUE hash2)
{
rb_hash_modify_check(hash);
hash2 = to_hash(hash2);

Check_Type(hash2, T_HASH);

if (hash == hash2) return hash;

if (RHASH_AR_TABLE_P(hash2)) {
if (RHASH_AR_TABLE_P(hash)) ar_free_and_clear_table(hash);
ar_copy(hash, hash2);
if (RHASH_AR_TABLE_SIZE(hash))
rb_hash_rehash(hash);
}
else if (RHASH_ST_TABLE_P(hash2)) {
if (RHASH_ST_TABLE_P(hash)) st_free_table(RHASH_ST_TABLE(hash));
RHASH_ST_TABLE_SET(hash, st_copy(RHASH_ST_TABLE(hash2)));
if (RHASH_ST_TABLE(hash)->num_entries)
rb_hash_rehash(hash);
}
else if (RHASH_AR_TABLE_P(hash)) {
ar_clear(hash);
}
else if (RHASH_ST_TABLE_P(hash)) {
st_clear(RHASH_ST_TABLE(hash));
}

COPY_DEFAULT(hash, hash2);

return hash;
}

/*
* call-seq:
* hsh.replace(other_hash) -> hsh
Expand All @@ -2868,26 +2825,35 @@ rb_hash_replace(VALUE hash, VALUE hash2)
{
rb_hash_modify_check(hash);
if (hash == hash2) return hash;
if (RHASH_ITER_LEV(hash) > 0) {
rb_raise(rb_eRuntimeError, "can't replace hash during iteration");
}
hash2 = to_hash(hash2);

COPY_DEFAULT(hash, hash2);

rb_hash_clear(hash);

if (RHASH_AR_TABLE_P(hash)) {
if (RHASH_AR_TABLE_P(hash2)) {
ar_copy(hash, hash2);
}
else {
goto st_to_st;
}
if (RHASH_AR_TABLE_P(hash2)) {
ar_clear(hash);
}
else {
ar_free_and_clear_table(hash);
RHASH_ST_TABLE_SET(hash, st_init_table_with_size(RHASH_TYPE(hash2), RHASH_SIZE(hash2)));
}
}
else {
if (RHASH_AR_TABLE_P(hash2)) ar_force_convert_table(hash2, __FILE__, __LINE__);
st_to_st:
if (RHASH_AR_TABLE_P(hash2)) {
st_free_table(RHASH_ST_TABLE(hash));
RHASH_ST_CLEAR(hash);
}
else {
st_clear(RHASH_ST_TABLE(hash));
RHASH_TBL_RAW(hash)->type = RHASH_ST_TABLE(hash2)->type;
rb_hash_foreach(hash2, replace_i, hash);
}
}
rb_hash_foreach(hash2, rb_hash_rehash_i, (VALUE)hash);

rb_gc_writebarrier_remember(hash);

return hash;
}
Expand Down Expand Up @@ -6143,7 +6109,7 @@ Init_Hash(void)
rb_define_singleton_method(rb_cHash, "[]", rb_hash_s_create, -1);
rb_define_singleton_method(rb_cHash, "try_convert", rb_hash_s_try_convert, 1);
rb_define_method(rb_cHash, "initialize", rb_hash_initialize, -1);
rb_define_method(rb_cHash, "initialize_copy", rb_hash_initialize_copy, 1);
rb_define_method(rb_cHash, "initialize_copy", rb_hash_replace, 1);
rb_define_method(rb_cHash, "rehash", rb_hash_rehash, 0);

rb_define_method(rb_cHash, "to_hash", rb_hash_to_hash, 0);
Expand Down

0 comments on commit b970259

Please sign in to comment.