Skip to content

Commit c58970b

Browse files
authored
ZJIT: Optimize variadic cfunc Send calls into CCallVariadic (#14898)
ZJIT: Optimize variadic cfunc Send calls into CCallVariadic
1 parent 8dc5822 commit c58970b

File tree

4 files changed

+125
-38
lines changed

4 files changed

+125
-38
lines changed

test/ruby/test_zjit.rb

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,21 @@ def test = [1, 2].map(&:to_s)
525525
}
526526
end
527527

528+
def test_send_variadic_with_block
529+
assert_compiles '[[1, "a"], [2, "b"], [3, "c"]]', %q{
530+
A = [1, 2, 3]
531+
B = ["a", "b", "c"]
532+
533+
def test
534+
result = []
535+
A.zip(B) { |x, y| result << [x, y] }
536+
result
537+
end
538+
539+
test; test
540+
}, call_threshold: 2
541+
end
542+
528543
def test_send_splat
529544
assert_runs '[1, 2]', %q{
530545
def test(a, b) = [a, b]

zjit/src/codegen.rs

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -431,8 +431,8 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
431431
gen_send_without_block(jit, asm, *cd, &function.frame_state(*state), SendFallbackReason::CCallWithFrameTooManyArgs),
432432
Insn::CCallWithFrame { cfunc, name, args, cme, state, blockiseq, .. } =>
433433
gen_ccall_with_frame(jit, asm, *cfunc, *name, opnds!(args), *cme, *blockiseq, &function.frame_state(*state)),
434-
Insn::CCallVariadic { cfunc, recv, args, name, cme, state, return_type: _, elidable: _ } => {
435-
gen_ccall_variadic(jit, asm, *cfunc, *name, opnd!(recv), opnds!(args), *cme, &function.frame_state(*state))
434+
Insn::CCallVariadic { cfunc, recv, args, name, cme, state, blockiseq, .. } => {
435+
gen_ccall_variadic(jit, asm, *cfunc, *name, opnd!(recv), opnds!(args), *cme, *blockiseq, &function.frame_state(*state))
436436
}
437437
Insn::GetIvar { self_val, id, ic, state: _ } => gen_getivar(jit, asm, opnd!(self_val), *id, *ic),
438438
Insn::SetGlobal { id, val, state } => no_output!(gen_setglobal(jit, asm, *id, opnd!(val), &function.frame_state(*state))),
@@ -845,26 +845,47 @@ fn gen_ccall_variadic(
845845
recv: Opnd,
846846
args: Vec<Opnd>,
847847
cme: *const rb_callable_method_entry_t,
848+
blockiseq: Option<IseqPtr>,
848849
state: &FrameState,
849850
) -> lir::Opnd {
850851
gen_incr_counter(asm, Counter::variadic_cfunc_optimized_send_count);
852+
gen_stack_overflow_check(jit, asm, state, state.stack_size());
851853

852-
gen_prepare_non_leaf_call(jit, asm, state);
854+
let args_with_recv_len = args.len() + 1;
853855

854-
let stack_growth = state.stack_size();
855-
gen_stack_overflow_check(jit, asm, state, stack_growth);
856+
// Compute the caller's stack size after consuming recv and args.
857+
// state.stack() includes recv + args, so subtract both.
858+
let caller_stack_size = state.stack_size() - args_with_recv_len;
856859

857-
gen_push_frame(asm, args.len(), state, ControlFrame {
860+
// Can't use gen_prepare_non_leaf_call() because we need to adjust the SP
861+
// to account for the receiver and arguments (like gen_ccall_with_frame does)
862+
gen_prepare_call_with_gc(asm, state, false);
863+
gen_save_sp(asm, caller_stack_size);
864+
gen_spill_stack(jit, asm, state);
865+
gen_spill_locals(jit, asm, state);
866+
867+
let block_handler_specval = if let Some(block_iseq) = blockiseq {
868+
// Change cfp->block_code in the current frame. See vm_caller_setup_arg_block().
869+
// VM_CFP_TO_CAPTURED_BLOCK then turns &cfp->self into a block handler.
870+
// rb_captured_block->code.iseq aliases with cfp->block_code.
871+
asm.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_BLOCK_CODE), VALUE::from(block_iseq).into());
872+
let cfp_self_addr = asm.lea(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF));
873+
asm.or(cfp_self_addr, Opnd::Imm(1))
874+
} else {
875+
VM_BLOCK_HANDLER_NONE.into()
876+
};
877+
878+
gen_push_frame(asm, args_with_recv_len, state, ControlFrame {
858879
recv,
859880
iseq: None,
860881
cme,
861882
frame_type: VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL,
862-
specval: VM_BLOCK_HANDLER_NONE.into(),
883+
specval: block_handler_specval,
863884
pc: PC_POISON,
864885
});
865886

866887
asm_comment!(asm, "switch to new SP register");
867-
let sp_offset = (state.stack().len() - args.len() + VM_ENV_DATA_SIZE.to_usize()) * SIZEOF_VALUE;
888+
let sp_offset = (caller_stack_size + VM_ENV_DATA_SIZE.to_usize()) * SIZEOF_VALUE;
868889
let new_sp = asm.add(SP, sp_offset.into());
869890
asm.mov(SP, new_sp);
870891

zjit/src/hir.rs

Lines changed: 50 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -804,6 +804,7 @@ pub enum Insn {
804804
state: InsnId,
805805
return_type: Type,
806806
elidable: bool,
807+
blockiseq: Option<IseqPtr>,
807808
},
808809

809810
/// Un-optimized fallback implementation (dynamic dispatch) for send-ish instructions
@@ -1920,8 +1921,8 @@ impl Function {
19201921
elidable,
19211922
blockiseq,
19221923
},
1923-
&CCallVariadic { cfunc, recv, ref args, cme, name, state, return_type, elidable } => CCallVariadic {
1924-
cfunc, recv: find!(recv), args: find_vec!(args), cme, name, state, return_type, elidable
1924+
&CCallVariadic { cfunc, recv, ref args, cme, name, state, return_type, elidable, blockiseq } => CCallVariadic {
1925+
cfunc, recv: find!(recv), args: find_vec!(args), cme, name, state, return_type, elidable, blockiseq
19251926
},
19261927
&Defined { op_type, obj, pushval, v, state } => Defined { op_type, obj, pushval, v: find!(v), state: find!(state) },
19271928
&DefinedIvar { self_val, pushval, id, state } => DefinedIvar { self_val: find!(self_val), pushval, id, state },
@@ -2989,9 +2990,23 @@ impl Function {
29892990
return Err(());
29902991
}
29912992

2992-
// Find the `argc` (arity) of the C method, which describes the parameters it expects
2993+
let ci_flags = unsafe { vm_ci_flag(call_info) };
2994+
2995+
// When seeing &block argument, fall back to dynamic dispatch for now
2996+
// TODO: Support block forwarding
2997+
if unspecializable_call_type(ci_flags) {
2998+
fun.count_complex_call_features(block, ci_flags);
2999+
fun.set_dynamic_send_reason(send_insn_id, ComplexArgPass);
3000+
return Err(());
3001+
}
3002+
3003+
let blockiseq = if blockiseq.is_null() { None } else { Some(blockiseq) };
3004+
29933005
let cfunc = unsafe { get_cme_def_body_cfunc(cme) };
3006+
// Find the `argc` (arity) of the C method, which describes the parameters it expects
29943007
let cfunc_argc = unsafe { get_mct_argc(cfunc) };
3008+
let cfunc_ptr = unsafe { get_mct_func(cfunc) }.cast();
3009+
29953010
match cfunc_argc {
29963011
0.. => {
29973012
// (self, arg0, arg1, ..., argc) form
@@ -3001,16 +3016,6 @@ impl Function {
30013016
return Err(());
30023017
}
30033018

3004-
let ci_flags = unsafe { vm_ci_flag(call_info) };
3005-
3006-
// When seeing &block argument, fall back to dynamic dispatch for now
3007-
// TODO: Support block forwarding
3008-
if unspecializable_call_type(ci_flags) {
3009-
fun.count_complex_call_features(block, ci_flags);
3010-
fun.set_dynamic_send_reason(send_insn_id, ComplexArgPass);
3011-
return Err(());
3012-
}
3013-
30143019
// Commit to the replacement. Put PatchPoint.
30153020
fun.gen_patch_points_for_optimized_ccall(block, recv_class, method_id, cme, state);
30163021
if recv_class.instance_can_have_singleton_class() {
@@ -3023,17 +3028,14 @@ impl Function {
30233028
fun.insn_types[recv.0] = fun.infer_type(recv);
30243029
}
30253030

3026-
let blockiseq = if blockiseq.is_null() { None } else { Some(blockiseq) };
3027-
30283031
// Emit a call
3029-
let cfunc = unsafe { get_mct_func(cfunc) }.cast();
30303032
let mut cfunc_args = vec![recv];
30313033
cfunc_args.append(&mut args);
30323034

30333035
let name = rust_str_to_id(&qualified_method_name(unsafe { (*cme).owner }, unsafe { (*cme).called_id }));
30343036
let ccall = fun.push_insn(block, Insn::CCallWithFrame {
30353037
cd,
3036-
cfunc,
3038+
cfunc: cfunc_ptr,
30373039
args: cfunc_args,
30383040
cme,
30393041
name,
@@ -3047,9 +3049,37 @@ impl Function {
30473049
}
30483050
// Variadic method
30493051
-1 => {
3052+
// The method gets a pointer to the first argument
30503053
// func(int argc, VALUE *argv, VALUE recv)
3051-
fun.set_dynamic_send_reason(send_insn_id, SendCfuncVariadic);
3052-
Err(())
3054+
fun.gen_patch_points_for_optimized_ccall(block, recv_class, method_id, cme, state);
3055+
3056+
if recv_class.instance_can_have_singleton_class() {
3057+
fun.push_insn(block, Insn::PatchPoint { invariant: Invariant::NoSingletonClass { klass: recv_class }, state });
3058+
}
3059+
if let Some(profiled_type) = profiled_type {
3060+
// Guard receiver class
3061+
recv = fun.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state });
3062+
fun.insn_types[recv.0] = fun.infer_type(recv);
3063+
}
3064+
3065+
if get_option!(stats) {
3066+
count_not_inlined_cfunc(fun, block, cme);
3067+
}
3068+
3069+
let ccall = fun.push_insn(block, Insn::CCallVariadic {
3070+
cfunc: cfunc_ptr,
3071+
recv,
3072+
args,
3073+
cme,
3074+
name: method_id,
3075+
state,
3076+
return_type: types::BasicObject,
3077+
elidable: false,
3078+
blockiseq
3079+
});
3080+
3081+
fun.make_equal_to(send_insn_id, ccall);
3082+
Ok(())
30533083
}
30543084
-2 => {
30553085
// (self, args_ruby_array)
@@ -3252,6 +3282,7 @@ impl Function {
32523282
state,
32533283
return_type,
32543284
elidable,
3285+
blockiseq: None,
32553286
});
32563287

32573288
fun.make_equal_to(send_insn_id, ccall);

zjit/src/hir/opt_tests.rs

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5302,26 +5302,46 @@ mod hir_opt_tests {
53025302
}
53035303

53045304
#[test]
5305-
fn test_do_not_optimize_send_variadic_with_block() {
5305+
fn test_optimize_send_variadic_with_block() {
53065306
eval(r#"
5307-
def test = [1, 2, 3].index { |x| x == 2 }
5307+
A = [1, 2, 3]
5308+
B = ["a", "b", "c"]
5309+
5310+
def test
5311+
result = []
5312+
A.zip(B) { |x, y| result << [x, y] }
5313+
result
5314+
end
5315+
53085316
test; test
53095317
"#);
53105318
assert_snapshot!(hir_string("test"), @r"
5311-
fn test@<compiled>:2:
5319+
fn test@<compiled>:6:
53125320
bb0():
53135321
EntryPoint interpreter
53145322
v1:BasicObject = LoadSelf
5315-
Jump bb2(v1)
5316-
bb1(v4:BasicObject):
5323+
v2:NilClass = Const Value(nil)
5324+
Jump bb2(v1, v2)
5325+
bb1(v5:BasicObject):
53175326
EntryPoint JIT(0)
5318-
Jump bb2(v4)
5319-
bb2(v6:BasicObject):
5320-
v10:ArrayExact[VALUE(0x1000)] = Const Value(VALUE(0x1000))
5321-
v11:ArrayExact = ArrayDup v10
5322-
v13:BasicObject = Send v11, 0x1008, :index
5327+
v6:NilClass = Const Value(nil)
5328+
Jump bb2(v5, v6)
5329+
bb2(v8:BasicObject, v9:NilClass):
5330+
v13:ArrayExact = NewArray
5331+
SetLocal l0, EP@3, v13
5332+
PatchPoint SingleRactorMode
5333+
PatchPoint StableConstantNames(0x1000, A)
5334+
v36:ArrayExact[VALUE(0x1008)] = Const Value(VALUE(0x1008))
5335+
PatchPoint SingleRactorMode
5336+
PatchPoint StableConstantNames(0x1010, B)
5337+
v39:ArrayExact[VALUE(0x1018)] = Const Value(VALUE(0x1018))
5338+
PatchPoint MethodRedefined(Array@0x1020, zip@0x1028, cme:0x1030)
5339+
PatchPoint NoSingletonClass(Array@0x1020)
5340+
v43:BasicObject = CCallVariadic zip@0x1058, v36, v39
5341+
v25:BasicObject = GetLocal l0, EP@3
5342+
v29:BasicObject = GetLocal l0, EP@3
53235343
CheckInterrupts
5324-
Return v13
5344+
Return v29
53255345
");
53265346
}
53275347

0 commit comments

Comments
 (0)