Skip to content

Commit

Permalink
Add pushtoarraykwsplat instruction to avoid unnecessary array allocation
Browse files Browse the repository at this point in the history
This is designed to replace the newarraykwsplat instruction, which is
no longer used in the parse.y compiler after this commit.  This avoids
an unnecessary array allocation in the case where ARGSCAT is followed
by LIST with keyword:

```ruby
a = []
kw = {}
[*a, 1, **kw]
```

Previous Instructions:

```
0000 newarray                               0                         (   1)[Li]
0002 setlocal_WC_0                          a@0
0004 newhash                                0                         (   2)[Li]
0006 setlocal_WC_0                          kw@1
0008 getlocal_WC_0                          a@0                       (   3)[Li]
0010 splatarray                             true
0012 putobject_INT2FIX_1_
0013 putspecialobject                       1
0015 newhash                                0
0017 getlocal_WC_0                          kw@1
0019 opt_send_without_block                 <calldata!mid:core#hash_merge_kwd, argc:2, ARGS_SIMPLE>
0021 newarraykwsplat                        2
0023 concattoarray
0024 leave
```

New Instructions:

```
0000 newarray                               0                         (   1)[Li]
0002 setlocal_WC_0                          a@0
0004 newhash                                0                         (   2)[Li]
0006 setlocal_WC_0                          kw@1
0008 getlocal_WC_0                          a@0                       (   3)[Li]
0010 splatarray                             true
0012 putobject_INT2FIX_1_
0013 pushtoarray                            1
0015 putspecialobject                       1
0017 newhash                                0
0019 getlocal_WC_0                          kw@1
0021 opt_send_without_block                 <calldata!mid:core#hash_merge_kwd, argc:2, ARGS_SIMPLE>
0023 pushtoarraykwsplat
0024 leave
```

pushtoarraykwsplat is designed to be simpler than newarraykwsplat.
It does not take a variable number of arguments from the stack, it
pops the top of the stack, and appends it to the second from the top,
unless the top of the stack is an empty hash.

During this work, I found the ARGSPUSH followed by HASH with keyword
did not compile correctly, as it pushed the generated hash to the
array even if the hash was empty.  This fixes the behavior, to use
pushtoarraykwsplat instead of pushtoarray in that case:

```ruby
a = []
kw = {}
[*a, **kw]

[{}] # Before

[] # After
```

This does not remove the newarraykwsplat instruction, as it is still
referenced in the prism compiler (which should be updated similar
to this), YJIT (only in the bindings, it does not appear to be
implemented), and RJIT (in a couple comments).  After those are
updated, the newarraykwsplat instruction should be removed.
  • Loading branch information
jeremyevans committed Feb 20, 2024
1 parent 2e2e3d8 commit 77c1233
Show file tree
Hide file tree
Showing 5 changed files with 236 additions and 193 deletions.
2 changes: 1 addition & 1 deletion bootstraptest/test_insns.rb
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def m&b
[ 'intern', %q{ :"#{true}" }, ],

[ 'newarray', %q{ ["true"][0] }, ],
[ 'newarraykwsplat', %q{ [**{x:'true'}][0][:x] }, ],
[ 'pushtoarraykwsplat', %q{ [**{x:'true'}][0][:x] }, ],
[ 'duparray', %q{ [ true ][0] }, ],
[ 'expandarray', %q{ y = [ true, false, nil ]; x, = y; x }, ],
[ 'expandarray', %q{ y = [ true, false, nil ]; x, *z = y; x }, ],
Expand Down
34 changes: 23 additions & 11 deletions compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -4813,12 +4813,12 @@ compile_array(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *node, int pop
* putobject [1,2,3,...,100] (<- hidden array); concattoarray;
* push z; pushtoarray 1;
*
* - If the last element is a keyword, newarraykwsplat should be emitted
* to check and remove empty keyword arguments hash from array.
* - If the last element is a keyword, pushtoarraykwsplat should be emitted
* to only push it onto the array if it is not empty
* (Note: a keyword is NODE_HASH which is not static_literal_node_p.)
*
* [1,2,3,**kw] =>
* putobject 1; putobject 2; putobject 3; push kw; newarraykwsplat
* putobject 1; putobject 2; putobject 3; newarray 3; ...; pushtoarraykwsplat kw
*/

const int max_stack_len = 0x100;
Expand Down Expand Up @@ -4873,15 +4873,22 @@ compile_array(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *node, int pop
EXPECT_NODE("compile_array", node, NODE_LIST, -1);
}

NO_CHECK(COMPILE_(ret, "array element", RNODE_LIST(node)->nd_head, 0));
stack_len++;

if (!RNODE_LIST(node)->nd_next && keyword_node_p(RNODE_LIST(node)->nd_head)) {
/* Reached the end, and the last element is a keyword */
ADD_INSN1(ret, line_node, newarraykwsplat, INT2FIX(stack_len));
if (!first_chunk) ADD_INSN(ret, line_node, concattoarray);
/* Create array or push existing non-keyword elements onto array */
if (stack_len == 0 && first_chunk) {
ADD_INSN1(ret, line_node, newarray, INT2FIX(0));
}
else {
FLUSH_CHUNK;
}
NO_CHECK(COMPILE_(ret, "array element", RNODE_LIST(node)->nd_head, 0));
ADD_INSN(ret, line_node, pushtoarraykwsplat);
return 1;
}
else {
NO_CHECK(COMPILE_(ret, "array element", RNODE_LIST(node)->nd_head, 0));
stack_len++;
}

/* If there are many pushed elements, flush them to avoid stack overflow */
if (stack_len >= max_stack_len) FLUSH_CHUNK;
Expand Down Expand Up @@ -10426,13 +10433,18 @@ iseq_compile_each0(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const no
else {
CHECK(COMPILE(ret, "argspush head", RNODE_ARGSPUSH(node)->nd_head));
const NODE *body_node = RNODE_ARGSPUSH(node)->nd_body;
if (static_literal_node_p(body_node, iseq, false)) {
if (keyword_node_p(body_node)) {
CHECK(COMPILE_(ret, "array element", body_node, FALSE));
ADD_INSN(ret, node, pushtoarraykwsplat);
}
else if (static_literal_node_p(body_node, iseq, false)) {
ADD_INSN1(ret, body_node, putobject, static_literal_value(body_node, iseq));
ADD_INSN1(ret, node, pushtoarray, INT2FIX(1));
}
else {
CHECK(COMPILE_(ret, "array element", body_node, FALSE));
ADD_INSN1(ret, node, pushtoarray, INT2FIX(1));
}
ADD_INSN1(ret, node, pushtoarray, INT2FIX(1));
}
break;
}
Expand Down
14 changes: 14 additions & 0 deletions insns.def
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,20 @@ newarraykwsplat
}
}

/* push hash onto array unless the hash is empty (as empty keyword
splats should be ignored).
*/
DEFINE_INSN
pushtoarraykwsplat
()
(VALUE ary, VALUE hash)
(VALUE ary)
{
if (!RHASH_EMPTY_P(hash)) {
rb_ary_push(ary, hash);
}
}

/* dup array */
DEFINE_INSN
duparray
Expand Down
15 changes: 15 additions & 0 deletions test/ruby/test_syntax.rb
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ def test_hash_kwsplat_hash
def test_array_kwsplat_hash
kw = {}
h = {a: 1}
a = []
assert_equal([], [**{}])
assert_equal([], [**kw])
assert_equal([h], [**h])
Expand All @@ -247,6 +248,20 @@ def test_array_kwsplat_hash
assert_equal([1, kw], [1, kw])
assert_equal([1, h], [1, h])

assert_equal([], [*a, **{}])
assert_equal([], [*a, **kw])
assert_equal([h], [*a, **h])
assert_equal([{}], [*a, {}])
assert_equal([kw], [*a, kw])
assert_equal([h], [*a, h])

assert_equal([1], [1, *a, **{}])
assert_equal([1], [1, *a, **kw])
assert_equal([1, h], [1, *a, **h])
assert_equal([1, {}], [1, *a, {}])
assert_equal([1, kw], [1, *a, kw])
assert_equal([1, h], [1, *a, h])

assert_equal([], [**kw, **kw])
assert_equal([], [**kw, **{}, **kw])
assert_equal([1], [1, **kw, **{}, **kw])
Expand Down

0 comments on commit 77c1233

Please sign in to comment.