Skip to content

Commit f2d8db3

Browse files
committed
Reduce memory usage of Hash object
## Implementation Summary * Change entry list from segmented list to flat array. * Change value of hash bucket from pointer to entry to index of entry list, and represent it by variable length bits according to capacity of hash buckets. * Store management information about entry list and hash table to `struct RHash` as much as possible. ## Benchmark Summary Only the results of typical situations on 64-bit Word-boxing are present here. For more detailed information, including consideration, see below (although most of the body is written in Japanese). * https://shuujii.github.io/mruby-hash-benchmark ### Memory Usage Lower value is better. | Hash Size | Baseline | New | Factor | |----------:|--------------:|--------------:|-----------:| | 16 | 344B | 256B | 0.74419x | | 40 | 1,464B | 840B | 0.57377x | | 200 | 8,056B | 3,784B | 0.46971x | | 500 | 17,169B | 9,944B | 0.57949x | ### Performance Higher value is better. #### `mrb_hash_set` | Hash Size | Baseline | New | Factor | |----------:|--------------:|--------------:|-----------:| | 16 | 1.41847M i/s | 1.36004M i/s | 0.95881x | | 40 | 0.39224M i/s | 0.31888M i/s | 0.81296x | | 200 | 0.03780M i/s | 0.04290M i/s | 1.13494x | | 500 | 0.01225M i/s | 0.01314M i/s | 1.07275x | #### `mrb_hash_get` | Hash Size | Baseline | New | Factor | |----------:|--------------:|--------------:|-----------:| | 16 | 26.05920M i/s | 30.19543M i/s | 1.15872x | | 40 | 44.26420M i/s | 32.75781M i/s | 0.74005x | | 200 | 44.55171M i/s | 31.56926M i/s | 0.70860x | | 500 | 39.19250M i/s | 29.73806M i/s | 0.75877x | #### `mrb_hash_each` | Hash Size | Baseline | New | Factor | |----------:|--------------:|--------------:|-----------:| | 16 | 25.11964M i/s | 30.34167M i/s | 1.20789x | | 40 | 11.74253M i/s | 13.25539M i/s | 1.12884x | | 200 | 2.01133M i/s | 2.97214M i/s | 1.47770x | | 500 | 0.87411M i/s | 1.21178M i/s | 1.38631x | #### `Hash#[]=` | Hash Size | Baseline | New | Factor | |----------:|--------------:|--------------:|-----------:| | 16 | 0.50095M i/s | 0.56490M i/s | 1.12764x | | 40 | 0.19132M i/s | 0.18392M i/s | 0.96129x | | 200 | 0.03624M i/s | 0.03256M i/s | 0.89860x | | 500 | 0.01527M i/s | 0.01236M i/s | 0.80935x | #### `Hash#[]` | Hash Size | Baseline | New | Factor | |----------:|--------------:|--------------:|-----------:| | 16 | 11.53211M i/s | 12.78806M i/s | 1.10891x | | 40 | 15.26920M i/s | 13.37529M i/s | 0.87596x | | 200 | 15.28550M i/s | 13.36410M i/s | 0.87430x | | 500 | 14.57695M i/s | 12.75388M i/s | 0.87494x | #### `Hash#each` | Hash Size | Baseline | New | Factor | |----------:|--------------:|--------------:|-----------:| | 16 | 0.30462M i/s | 0.27080M i/s | 0.88898x | | 40 | 0.12912M i/s | 0.11704M i/s | 0.90642x | | 200 | 0.02638M i/s | 0.02402M i/s | 0.91071x | | 500 | 0.01066M i/s | 0.00959M i/s | 0.89953x | #### `Hash#delete` | Hash Size | Baseline | New | Factor | |----------:|--------------:|--------------:|-----------:| | 16 | 7.84167M i/s | 6.96419M i/s | 0.88810x | | 40 | 6.91292M i/s | 7.41427M i/s | 1.07252x | | 200 | 3.75952M i/s | 7.32080M i/s | 1.94727x | | 500 | 2.10754M i/s | 7.05963M i/s | 3.34970x | #### `Hash#shift` | Hash Size | Baseline | New | Factor | |----------:|--------------:|--------------:|-----------:| | 16 | 14.66444M i/s | 13.18876M i/s | 0.89937x | | 40 | 11.95124M i/s | 11.10420M i/s | 0.92913x | | 200 | 5.53681M i/s | 7.88155M i/s | 1.42348x | | 500 | 2.96728M i/s | 5.40405M i/s | 1.82121x | #### `Hash#dup` | Hash Size | Baseline | New | Factor | |----------:|--------------:|--------------:|-----------:| | 16 | 0.15063M i/s | 5.37889M i/s | 35.71024x | | 40 | 0.06515M i/s | 3.38196M i/s | 51.91279x | | 200 | 0.01359M i/s | 1.46538M i/s | 107.84056x | | 500 | 0.00559M i/s | 0.75411M i/s | 134.88057x | ### Binary Size Lower value is better. | File | Baseline | New | Factor | |:-----------|--------------:|--------------:|----------:| | mruby | 730,408B | 734,176B | 1.00519x | | libmruby.a | 1,068,134B | 1,072,846B | 1.00441x | ## Other Fixes The following issues have also been fixed in the parts where there was some change this time. * [Heap use-after-free in `Hash#value?`](https://gist.github.com/shuujii/30e4fcd5844a4112a0ecd4a5b3483101#file-heap-use-after-free-in-hash-value-md) * [Heap use-after-free in `ht_hash_equal`](https://gist.github.com/shuujii/30e4fcd5844a4112a0ecd4a5b3483101#file-heap-use-after-free-in-ht_hash_equal-md) * [Heap use-after-free in `ht_hash_func`](https://gist.github.com/shuujii/30e4fcd5844a4112a0ecd4a5b3483101#file-heap-use-after-free-in-ht_hash_func-md) * [Heap use-after-free in `mrb_hash_merge`](https://gist.github.com/shuujii/30e4fcd5844a4112a0ecd4a5b3483101#file-heap-use-after-free-in-mrb_hash_merge-md) * [Self-replacement does not work for `Hash#replace`](https://gist.github.com/shuujii/30e4fcd5844a4112a0ecd4a5b3483101#file-self-replacement-does-not-work-for-hash-replace-md) * [Repeated deletes and inserts increase memory usage of `Hash`](https://gist.github.com/shuujii/30e4fcd5844a4112a0ecd4a5b3483101#file-repeated-deletes-and-inserts-increase-memory-usage-of-hash-md) * [`Hash#rehash` does not reindex completely](https://gist.github.com/shuujii/30e4fcd5844a4112a0ecd4a5b3483101#file-hash-rehash-does-not-reindex-completely-md) * `mrb_hash_delete_key` does not cause an error for frozen object * `mrb_hash_new_capa` does not allocate required space first * [`mrb_os_memsize_of_hash_table` result is incorrect](#5032 (comment))
1 parent 8455344 commit f2d8db3

File tree

4 files changed

+1944
-1002
lines changed

4 files changed

+1944
-1002
lines changed

include/mruby/hash.h

Lines changed: 34 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,22 @@
1414
*/
1515
MRB_BEGIN_DECL
1616

17+
/* offset of `iv` must be 3 words */
1718
struct RHash {
1819
MRB_OBJECT_HEADER;
20+
#ifdef MRB_64BIT
21+
uint32_t size;
1922
struct iv_tbl *iv;
20-
struct htable *ht;
23+
uint32_t ea_capa;
24+
uint32_t ea_n_used;
25+
#else
26+
struct iv_tbl *iv;
27+
uint32_t size;
28+
#endif
29+
union {
30+
struct hash_entry *ea;
31+
struct hash_table *ht;
32+
};
2133
};
2234

2335
#define mrb_hash_ptr(v) ((struct RHash*)(mrb_ptr(v)))
@@ -179,8 +191,9 @@ MRB_API mrb_value mrb_hash_clear(mrb_state *mrb, mrb_value hash);
179191
MRB_API mrb_int mrb_hash_size(mrb_state *mrb, mrb_value hash);
180192

181193
/*
182-
* Copies the hash.
183-
*
194+
* Copies the hash. This function does NOT copy the instance variables
195+
* (except for the default value). Use mrb_obj_dup() to copy the instance
196+
* variables as well.
184197
*
185198
* @param mrb The mruby state reference.
186199
* @param hash The target hash.
@@ -198,16 +211,24 @@ MRB_API mrb_value mrb_hash_dup(mrb_state *mrb, mrb_value hash);
198211
*/
199212
MRB_API void mrb_hash_merge(mrb_state *mrb, mrb_value hash1, mrb_value hash2);
200213

201-
/* RHASH_TBL allocates st_table if not available. */
202-
#define RHASH(obj) ((struct RHash*)(mrb_ptr(obj)))
203-
#define RHASH_TBL(h) (RHASH(h)->ht)
204-
#define RHASH_IFNONE(h) mrb_iv_get(mrb, (h), MRB_SYM(ifnone))
205-
#define RHASH_PROCDEFAULT(h) RHASH_IFNONE(h)
206-
207-
#define MRB_HASH_DEFAULT 1
208-
#define MRB_HASH_PROC_DEFAULT 2
209-
#define MRB_RHASH_DEFAULT_P(h) (RHASH(h)->flags & MRB_HASH_DEFAULT)
210-
#define MRB_RHASH_PROCDEFAULT_P(h) (RHASH(h)->flags & MRB_HASH_PROC_DEFAULT)
214+
#define RHASH(hash) ((struct RHash*)(mrb_ptr(hash)))
215+
#define RHASH_IFNONE(hash) mrb_iv_get(mrb, (hash), MRB_SYM(ifnone))
216+
#define RHASH_PROCDEFAULT(hash) RHASH_IFNONE(hash)
217+
218+
#define MRB_HASH_IB_BIT_BIT 5
219+
#define MRB_HASH_AR_EA_CAPA_BIT 5
220+
#define MRB_HASH_IB_BIT_SHIFT 0
221+
#define MRB_HASH_AR_EA_CAPA_SHIFT 0
222+
#define MRB_HASH_AR_EA_N_USED_SHIFT MRB_HASH_AR_EA_CAPA_BIT
223+
#define MRB_HASH_SIZE_FLAGS_SHIFT (MRB_HASH_AR_EA_CAPA_BIT * 2)
224+
#define MRB_HASH_IB_BIT_MASK ((1 << MRB_HASH_IB_BIT_BIT) - 1)
225+
#define MRB_HASH_AR_EA_CAPA_MASK ((1 << MRB_HASH_AR_EA_CAPA_BIT) - 1)
226+
#define MRB_HASH_AR_EA_N_USED_MASK (MRB_HASH_AR_EA_CAPA_MASK << MRB_HASH_AR_EA_N_USED_SHIFT)
227+
#define MRB_HASH_DEFAULT (1 << (MRB_HASH_SIZE_FLAGS_SHIFT + 0))
228+
#define MRB_HASH_PROC_DEFAULT (1 << (MRB_HASH_SIZE_FLAGS_SHIFT + 1))
229+
#define MRB_HASH_HT (1 << (MRB_HASH_SIZE_FLAGS_SHIFT + 2))
230+
#define MRB_RHASH_DEFAULT_P(hash) (RHASH(hash)->flags & MRB_HASH_DEFAULT)
231+
#define MRB_RHASH_PROCDEFAULT_P(hash) (RHASH(hash)->flags & MRB_HASH_PROC_DEFAULT)
211232

212233
/* GC functions */
213234
void mrb_gc_mark_hash(mrb_state*, struct RHash*);

mrblib/hash.rb

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -144,26 +144,6 @@ def each_value(&block)
144144
self
145145
end
146146

147-
##
148-
# Replaces the contents of <i>hsh</i> with the contents of other hash
149-
#
150-
# ISO 15.2.13.4.23
151-
def replace(hash)
152-
raise TypeError, "Hash required (#{hash.class} given)" unless Hash === hash
153-
self.clear
154-
hash.each_key{|k|
155-
self[k] = hash[k]
156-
}
157-
if hash.default_proc
158-
self.default_proc = hash.default_proc
159-
else
160-
self.default = hash.default
161-
end
162-
self
163-
end
164-
# ISO 15.2.13.4.17
165-
alias initialize_copy replace
166-
167147
##
168148
# Return a hash which contains the content of
169149
# +self+ and +other+. If a block is given

0 commit comments

Comments
 (0)