Skip to content

Commit 56cc3e9

Browse files
eightbitraptortenderlove
authored andcommitted
Move String RVALUES between pools
And re-embed any strings that can now fit inside the slot they've been moved to
1 parent f8502a2 commit 56cc3e9

File tree

5 files changed

+194
-31
lines changed

5 files changed

+194
-31
lines changed

gc.c

Lines changed: 75 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -837,6 +837,8 @@ typedef struct rb_objspace {
837837
struct {
838838
size_t considered_count_table[T_MASK];
839839
size_t moved_count_table[T_MASK];
840+
size_t moved_up_count_table[T_MASK];
841+
size_t moved_down_count_table[T_MASK];
840842
size_t total_moved;
841843
} rcompactor;
842844

@@ -5091,7 +5093,7 @@ gc_setup_mark_bits(struct heap_page *page)
50915093
}
50925094

50935095
static int gc_is_moveable_obj(rb_objspace_t *objspace, VALUE obj);
5094-
static VALUE gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, size_t slot_size);
5096+
static VALUE gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, size_t src_slot_size, size_t slot_size);
50955097

50965098
static void
50975099
lock_page_body(rb_objspace_t *objspace, struct heap_page_body *body)
@@ -5130,6 +5132,7 @@ unlock_page_body(rb_objspace_t *objspace, struct heap_page_body *body)
51305132
static bool
51315133
try_move(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *free_page, VALUE src)
51325134
{
5135+
struct heap_page *src_page = GET_HEAP_PAGE(src);
51335136
if (!free_page) {
51345137
return false;
51355138
}
@@ -5150,12 +5153,16 @@ try_move(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *free_page,
51505153
free_page->freelist = RANY(dest)->as.free.next;
51515154

51525155
GC_ASSERT(RB_BUILTIN_TYPE(dest) == T_NONE);
5153-
GC_ASSERT(free_page->slot_size == GET_HEAP_PAGE(src)->slot_size);
51545156

5157+
if (src_page->slot_size > free_page->slot_size) {
5158+
objspace->rcompactor.moved_down_count_table[BUILTIN_TYPE(src)]++;
5159+
} else if (free_page->slot_size > src_page->slot_size) {
5160+
objspace->rcompactor.moved_up_count_table[BUILTIN_TYPE(src)]++;
5161+
}
51555162
objspace->rcompactor.moved_count_table[BUILTIN_TYPE(src)]++;
51565163
objspace->rcompactor.total_moved++;
51575164

5158-
gc_move(objspace, src, dest, free_page->slot_size);
5165+
gc_move(objspace, src, dest, src_page->slot_size, free_page->slot_size);
51595166
gc_pin(objspace, src);
51605167
free_page->free_slots--;
51615168
}
@@ -5907,7 +5914,7 @@ invalidate_moved_plane(rb_objspace_t *objspace, struct heap_page *page, uintptr_
59075914

59085915
object = rb_gc_location(forwarding_object);
59095916

5910-
gc_move(objspace, object, forwarding_object, page->slot_size);
5917+
gc_move(objspace, object, forwarding_object, GET_HEAP_PAGE(object)->slot_size, page->slot_size);
59115918
/* forwarding_object is now our actual object, and "object"
59125919
* is the free slot for the original page */
59135920
struct heap_page *orig_page = GET_HEAP_PAGE(object);
@@ -5976,6 +5983,8 @@ gc_compact_start(rb_objspace_t *objspace)
59765983

59775984
memset(objspace->rcompactor.considered_count_table, 0, T_MASK * sizeof(size_t));
59785985
memset(objspace->rcompactor.moved_count_table, 0, T_MASK * sizeof(size_t));
5986+
memset(objspace->rcompactor.moved_up_count_table, 0, T_MASK * sizeof(size_t));
5987+
memset(objspace->rcompactor.moved_down_count_table, 0, T_MASK * sizeof(size_t));
59795988

59805989
/* Set up read barrier for pages containing MOVED objects */
59815990
install_handlers();
@@ -8224,14 +8233,34 @@ gc_compact_heap_cursors_met_p(rb_heap_t *heap)
82248233
return heap->sweeping_page == heap->compact_cursor;
82258234
}
82268235

8236+
static rb_size_pool_t *
8237+
gc_compact_destination_pool(rb_objspace_t *objspace, rb_size_pool_t *src_pool, VALUE src)
8238+
{
8239+
size_t obj_size;
8240+
8241+
switch (BUILTIN_TYPE(src)) {
8242+
case T_STRING:
8243+
obj_size = rb_str_size_as_embedded(src);
8244+
if (rb_gc_size_allocatable_p(obj_size)){
8245+
return &size_pools[size_pool_idx_for_size(obj_size)];
8246+
}
8247+
else {
8248+
GC_ASSERT(!STR_EMBED_P(src));
8249+
return &size_pools[0];
8250+
}
8251+
default:
8252+
return src_pool;
8253+
}
8254+
}
8255+
82278256
static bool
8228-
gc_compact_move(rb_objspace_t *objspace, rb_heap_t *heap, VALUE src)
8257+
gc_compact_move(rb_objspace_t *objspace, rb_heap_t *heap, rb_size_pool_t *size_pool, VALUE src)
82298258
{
82308259
GC_ASSERT(BUILTIN_TYPE(src) != T_MOVED);
8231-
rb_heap_t *dheap = heap;
8260+
rb_heap_t *dheap = SIZE_POOL_EDEN_HEAP(gc_compact_destination_pool(objspace, size_pool, src));
82328261

82338262
if (gc_compact_heap_cursors_met_p(dheap)) {
8234-
return false;
8263+
return dheap != heap;
82358264
}
82368265
while (!try_move(objspace, dheap, dheap->free_pages, src)) {
82378266
struct gc_sweep_context ctx = {
@@ -8254,7 +8283,7 @@ gc_compact_move(rb_objspace_t *objspace, rb_heap_t *heap, VALUE src)
82548283
}
82558284

82568285
static bool
8257-
gc_compact_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bitset, struct heap_page *page) {
8286+
gc_compact_plane(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *heap, uintptr_t p, bits_t bitset, struct heap_page *page) {
82588287
short slot_size = page->slot_size;
82598288
short slot_bits = slot_size / BASE_SLOT_SIZE;
82608289
GC_ASSERT(slot_bits > 0);
@@ -8266,7 +8295,7 @@ gc_compact_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t b
82668295
if (bitset & 1) {
82678296
objspace->rcompactor.considered_count_table[BUILTIN_TYPE(vp)]++;
82688297

8269-
if (!gc_compact_move(objspace, heap, vp)) {
8298+
if (!gc_compact_move(objspace, heap, size_pool, vp)) {
82708299
//the cursors met. bubble up
82718300
return false;
82728301
}
@@ -8295,15 +8324,15 @@ gc_compact_page(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *h
82958324
bitset = (mark_bits[0] & ~pin_bits[0]);
82968325
bitset >>= NUM_IN_PAGE(p);
82978326
if (bitset) {
8298-
if (!gc_compact_plane(objspace, heap, (uintptr_t)p, bitset, page))
8327+
if (!gc_compact_plane(objspace, size_pool, heap, (uintptr_t)p, bitset, page))
82998328
return false;
83008329
}
83018330
p += (BITS_BITLENGTH - NUM_IN_PAGE(p)) * BASE_SLOT_SIZE;
83028331

83038332
for (int j = 1; j < HEAP_PAGE_BITMAP_LIMIT; j++) {
83048333
bitset = (mark_bits[j] & ~pin_bits[j]);
83058334
if (bitset) {
8306-
if (!gc_compact_plane(objspace, heap, (uintptr_t)p, bitset, page))
8335+
if (!gc_compact_plane(objspace, size_pool, heap, (uintptr_t)p, bitset, page))
83078336
return false;
83088337
}
83098338
p += BITS_BITLENGTH * BASE_SLOT_SIZE;
@@ -8347,7 +8376,6 @@ gc_sweep_compact(rb_objspace_t *objspace)
83478376
struct heap_page *start_page = heap->compact_cursor;
83488377

83498378
if (!gc_compact_page(objspace, size_pool, heap, start_page)) {
8350-
GC_ASSERT(heap->sweeping_page == heap->compact_cursor);
83518379
lock_page_body(objspace, GET_PAGE_BODY(start_page->start));
83528380

83538381
continue;
@@ -9626,7 +9654,7 @@ gc_is_moveable_obj(rb_objspace_t *objspace, VALUE obj)
96269654
}
96279655

96289656
static VALUE
9629-
gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, size_t slot_size)
9657+
gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, size_t src_slot_size, size_t slot_size)
96309658
{
96319659
int marked;
96329660
int wb_unprotected;
@@ -9676,8 +9704,8 @@ gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, size_t slot_size)
96769704
}
96779705

96789706
/* Move the object */
9679-
memcpy(dest, src, slot_size);
9680-
memset(src, 0, slot_size);
9707+
memcpy(dest, src, MIN(src_slot_size, slot_size));
9708+
memset(src, 0, src_slot_size);
96819709

96829710
/* Set bits for object in new location */
96839711
if (marking) {
@@ -10271,23 +10299,31 @@ gc_update_object_references(rb_objspace_t *objspace, VALUE obj)
1027110299
break;
1027210300

1027310301
case T_STRING:
10274-
if (STR_SHARED_P(obj)) {
10302+
{
1027510303
#if USE_RVARGC
10276-
VALUE orig_shared = any->as.string.as.heap.aux.shared;
1027710304
#endif
10278-
UPDATE_IF_MOVED(objspace, any->as.string.as.heap.aux.shared);
10305+
10306+
if (STR_SHARED_P(obj)) {
1027910307
#if USE_RVARGC
10280-
VALUE shared = any->as.string.as.heap.aux.shared;
10281-
if (STR_EMBED_P(shared)) {
10282-
size_t offset = (size_t)any->as.string.as.heap.ptr - (size_t)RSTRING(orig_shared)->as.embed.ary;
10283-
GC_ASSERT(any->as.string.as.heap.ptr >= RSTRING(orig_shared)->as.embed.ary);
10284-
GC_ASSERT(offset <= (size_t)RSTRING(shared)->as.embed.len);
10285-
any->as.string.as.heap.ptr = RSTRING(shared)->as.embed.ary + offset;
10286-
}
10308+
VALUE old_root = any->as.string.as.heap.aux.shared;
1028710309
#endif
10288-
}
10289-
break;
10310+
UPDATE_IF_MOVED(objspace, any->as.string.as.heap.aux.shared);
10311+
#if USE_RVARGC
10312+
VALUE new_root = any->as.string.as.heap.aux.shared;
10313+
rb_str_update_shared_ary(obj, old_root, new_root);
10314+
10315+
// if, after move the string is not embedded, and can fit in the
10316+
// slot it's been placed in, then re-embed it
10317+
if ((size_t)GET_HEAP_PAGE(obj)->slot_size >= rb_str_size_as_embedded(obj)) {
10318+
if (!STR_EMBED_P(obj) && rb_str_reembeddable_p(obj)) {
10319+
rb_str_make_embedded(obj);
10320+
}
10321+
}
10322+
#endif
10323+
}
1029010324

10325+
break;
10326+
}
1029110327
case T_DATA:
1029210328
/* Call the compaction callback, if it exists */
1029310329
{
@@ -10479,6 +10515,8 @@ gc_compact_stats(VALUE self)
1047910515
VALUE h = rb_hash_new();
1048010516
VALUE considered = rb_hash_new();
1048110517
VALUE moved = rb_hash_new();
10518+
VALUE moved_up = rb_hash_new();
10519+
VALUE moved_down = rb_hash_new();
1048210520

1048310521
for (i=0; i<T_MASK; i++) {
1048410522
if (objspace->rcompactor.considered_count_table[i]) {
@@ -10488,10 +10526,20 @@ gc_compact_stats(VALUE self)
1048810526
if (objspace->rcompactor.moved_count_table[i]) {
1048910527
rb_hash_aset(moved, type_sym(i), SIZET2NUM(objspace->rcompactor.moved_count_table[i]));
1049010528
}
10529+
10530+
if (objspace->rcompactor.moved_up_count_table[i]) {
10531+
rb_hash_aset(moved_up, type_sym(i), SIZET2NUM(objspace->rcompactor.moved_up_count_table[i]));
10532+
}
10533+
10534+
if (objspace->rcompactor.moved_down_count_table[i]) {
10535+
rb_hash_aset(moved_down, type_sym(i), SIZET2NUM(objspace->rcompactor.moved_down_count_table[i]));
10536+
}
1049110537
}
1049210538

1049310539
rb_hash_aset(h, ID2SYM(rb_intern("considered")), considered);
1049410540
rb_hash_aset(h, ID2SYM(rb_intern("moved")), moved);
10541+
rb_hash_aset(h, ID2SYM(rb_intern("moved_up")), moved_up);
10542+
rb_hash_aset(h, ID2SYM(rb_intern("moved_down")), moved_down);
1049510543

1049610544
return h;
1049710545
}

include/ruby/internal/core/rstring.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -556,6 +556,9 @@ RSTRING_LENINT(VALUE str)
556556
return rb_long2int(RSTRING_LEN(str));
557557
}
558558

559+
bool
560+
rb_str_shared_root_p(VALUE str);
561+
559562
/**
560563
* Convenient macro to obtain the contents and length at once.
561564
*

internal/string.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@ void rb_str_tmp_frozen_release(VALUE str, VALUE tmp);
5959
VALUE rb_setup_fake_str(struct RString *fake_str, const char *name, long len, rb_encoding *enc);
6060
VALUE rb_str_upto_each(VALUE, VALUE, int, int (*each)(VALUE, VALUE), VALUE);
6161
VALUE rb_str_upto_endless_each(VALUE, int (*each)(VALUE, VALUE), VALUE);
62+
void rb_str_make_embedded(VALUE);
63+
size_t rb_str_size_as_embedded(VALUE);
64+
bool rb_str_reembeddable_p(VALUE);
65+
void rb_str_update_shared_ary(VALUE str, VALUE old_root, VALUE new_root);
6266
RUBY_SYMBOL_EXPORT_END
6367

6468
MJIT_SYMBOL_EXPORT_BEGIN

string.c

Lines changed: 73 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -221,17 +221,51 @@ str_embed_capa(VALUE str)
221221
#endif
222222
}
223223

224+
bool
225+
rb_str_reembeddable_p(VALUE str)
226+
{
227+
return !FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED);
228+
}
229+
224230
static inline size_t
225-
str_embed_size(long capa)
231+
rb_str_embed_size(long capa)
226232
{
227233
return offsetof(struct RString, as.embed.ary) + capa;
228234
}
229235

236+
bool
237+
rb_str_shared_root_p(VALUE str)
238+
{
239+
return FL_TEST_RAW(str, STR_SHARED_ROOT);
240+
}
241+
242+
size_t
243+
rb_str_size_as_embedded(VALUE str)
244+
{
245+
size_t real_size;
246+
#if USE_RVARGC
247+
if (STR_EMBED_P(str)) {
248+
real_size = rb_str_embed_size(RSTRING(str)->as.embed.len) + TERM_LEN(str);
249+
}
250+
/* if the string is not currently embedded, but it can be embedded, how
251+
* much space would it require */
252+
else if (rb_str_reembeddable_p(str)) {
253+
real_size = rb_str_embed_size(RSTRING(str)->as.heap.len) + TERM_LEN(str);
254+
}
255+
else {
256+
#endif
257+
real_size = sizeof(struct RString);
258+
#if USE_RVARGC
259+
}
260+
#endif
261+
return real_size;
262+
}
263+
230264
static inline bool
231265
STR_EMBEDDABLE_P(long len, long termlen)
232266
{
233267
#if USE_RVARGC
234-
return rb_gc_size_allocatable_p(str_embed_size(len + termlen));
268+
return rb_gc_size_allocatable_p(rb_str_embed_size(len + termlen));
235269
#else
236270
return len <= RSTRING_EMBED_LEN_MAX + 1 - termlen;
237271
#endif
@@ -264,6 +298,41 @@ rb_str_make_independent(VALUE str)
264298
}
265299
}
266300

301+
void
302+
rb_str_make_embedded(VALUE str) {
303+
RUBY_ASSERT(rb_str_reembeddable_p(str));
304+
RUBY_ASSERT(!STR_EMBED_P(str));
305+
306+
char *buf = RSTRING_PTR(str);
307+
long len = RSTRING_LEN(str);
308+
309+
STR_SET_EMBED(str);
310+
STR_SET_EMBED_LEN(str, len);
311+
312+
memmove(RSTRING_PTR(str), buf, len);
313+
ruby_xfree(buf);
314+
}
315+
316+
void
317+
rb_str_update_shared_ary(VALUE str, VALUE old_root, VALUE new_root)
318+
{
319+
// if the root location hasn't changed, we don't need to update
320+
if (new_root == old_root) {
321+
return;
322+
}
323+
324+
// if the root string isn't embedded, we don't need to touch the ponter.
325+
// it already points to the shame shared buffer
326+
if (!STR_EMBED_P(new_root)) {
327+
return;
328+
}
329+
330+
size_t offset = (size_t)((uintptr_t)RSTRING(str)->as.heap.ptr - (uintptr_t)RSTRING(old_root)->as.embed.ary);
331+
332+
RUBY_ASSERT(RSTRING(str)->as.heap.ptr >= RSTRING(old_root)->as.embed.ary);
333+
RSTRING(str)->as.heap.ptr = RSTRING(new_root)->as.embed.ary + offset;
334+
}
335+
267336
void
268337
rb_debug_rstring_null_ptr(const char *func)
269338
{
@@ -849,7 +918,7 @@ str_alloc(VALUE klass, size_t size)
849918
static inline VALUE
850919
str_alloc_embed(VALUE klass, size_t capa)
851920
{
852-
size_t size = str_embed_size(capa);
921+
size_t size = rb_str_embed_size(capa);
853922
assert(rb_gc_size_allocatable_p(size));
854923
#if !USE_RVARGC
855924
assert(size <= sizeof(struct RString));
@@ -1693,7 +1762,7 @@ ec_str_alloc(struct rb_execution_context_struct *ec, VALUE klass, size_t size)
16931762
static inline VALUE
16941763
ec_str_alloc_embed(struct rb_execution_context_struct *ec, VALUE klass, size_t capa)
16951764
{
1696-
size_t size = str_embed_size(capa);
1765+
size_t size = rb_str_embed_size(capa);
16971766
assert(rb_gc_size_allocatable_p(size));
16981767
#if !USE_RVARGC
16991768
assert(size <= sizeof(struct RString));

0 commit comments

Comments
 (0)