Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

YJIT: Add top ISEQ call counts to --yjit-stats #9906

Merged
merged 2 commits into from
Feb 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
24 changes: 24 additions & 0 deletions yjit.c
Original file line number Diff line number Diff line change
Expand Up @@ -895,6 +895,30 @@ rb_yjit_dump_iseq_loc(const rb_iseq_t *iseq, uint32_t insn_idx)
fprintf(stderr, "%s %.*s:%u\n", __func__, (int)len, ptr, rb_iseq_line_no(iseq, insn_idx));
}

// Get the number of digits required to print an integer
static int
num_digits(int integer)
{
int num = 1;
while (integer /= 10) {
num++;
}
return num;
}

// Allocate a C string that formats an ISEQ label like iseq_inspect()
char *
rb_yjit_iseq_inspect(const rb_iseq_t *iseq)
{
const char *label = RSTRING_PTR(iseq->body->location.label);
const char *path = RSTRING_PTR(rb_iseq_path(iseq));
int lineno = iseq->body->location.code_location.beg_pos.lineno;

char *buf = ZALLOC_N(char, strlen(label) + strlen(path) + num_digits(lineno) + 3);
sprintf(buf, "%s@%s:%d", label, path, lineno);
return buf;
}

// The FL_TEST() macro
VALUE
rb_FL_TEST(VALUE obj, VALUE flags)
Expand Down
32 changes: 12 additions & 20 deletions yjit.rb
Original file line number Diff line number Diff line change
Expand Up @@ -315,10 +315,11 @@ def _print_stats(out: $stderr) # :nodoc:
out.puts "num_send_polymorphic: " + format_number_pct(13, stats[:num_send_polymorphic], stats[:num_send])
out.puts "num_send_megamorphic: " + format_number_pct(13, stats[:send_megamorphic], stats[:num_send])
out.puts "num_send_dynamic: " + format_number_pct(13, stats[:num_send_dynamic], stats[:num_send])
out.puts "num_send_inline: " + format_number_pct(13, stats[:num_send_inline], stats[:num_send])
out.puts "num_send_leaf_builtin: " + format_number_pct(13, stats[:num_send_leaf_builtin], stats[:num_send])
out.puts "num_send_cfunc: " + format_number_pct(13, stats[:num_send_cfunc], stats[:num_send])
out.puts "num_send_cfunc_inline: " + format_number_pct(13, stats[:num_send_cfunc_inline], stats[:num_send_cfunc])
out.puts "num_send_iseq: " + format_number_pct(13, stats[:num_send_iseq], stats[:num_send])
out.puts "num_send_iseq_leaf: " + format_number_pct(13, stats[:num_send_iseq_leaf], stats[:num_send_iseq])
out.puts "num_send_iseq_inline: " + format_number_pct(13, stats[:num_send_iseq_inline], stats[:num_send_iseq])
if stats[:num_send_x86_rel32] != 0 || stats[:num_send_x86_reg] != 0
out.puts "num_send_x86_rel32: " + format_number(13, stats[:num_send_x86_rel32])
out.puts "num_send_x86_reg: " + format_number(13, stats[:num_send_x86_reg])
Expand Down Expand Up @@ -385,17 +386,12 @@ def _print_stats(out: $stderr) # :nodoc:

print_sorted_exit_counts(stats, out: out, prefix: "exit_")

print_sorted_cfunc_calls(stats, out:out)
print_sorted_method_calls(stats[:cfunc_calls], stats[:num_send_cfunc], out: out, type: 'C')
print_sorted_method_calls(stats[:iseq_calls], stats[:num_send_iseq], out: out, type: 'ISEQ')
end

def print_sorted_cfunc_calls(stats, out:, how_many: 20, left_pad: 4) # :nodoc:
calls = stats[:cfunc_calls]
if calls.empty?
return
end

# Total number of cfunc calls
num_send_cfunc = stats[:num_send_cfunc]
def print_sorted_method_calls(calls, num_calls, out:, type:, how_many: 20, left_pad: 4) # :nodoc:
return if calls.empty?

# Sort calls by decreasing frequency and keep the top N
pairs = calls.map { |k,v| [k, v] }
Expand All @@ -404,16 +400,14 @@ def print_sorted_cfunc_calls(stats, out:, how_many: 20, left_pad: 4) # :nodoc:
pairs = pairs[0...how_many]

top_n_total = pairs.sum { |name, count| count }
top_n_pct = 100.0 * top_n_total / num_send_cfunc
top_n_pct = 100.0 * top_n_total / num_calls
longest_name_len = pairs.max_by { |name, count| name.length }.first.length

out.puts "Top-#{pairs.size} most frequent C calls (#{"%.1f" % top_n_pct}% of C calls):"
out.puts "Top-#{pairs.size} most frequent #{type} calls (#{"%.1f" % top_n_pct}% of #{type} calls):"

pairs.each do |name, count|
padding = longest_name_len + left_pad
padded_name = "%#{padding}s" % name
padded_count = format_number_pct(10, count, num_send_cfunc)
out.puts("#{padded_name}: #{padded_count}")
padded_count = format_number_pct(10, count, num_calls)
out.puts("#{padded_count}: #{name}")
end
end

Expand All @@ -437,10 +431,8 @@ def print_sorted_exit_counts(stats, out:, prefix:, how_many: 20, left_pad: 4) #

longest_insn_name_len = exits.max_by { |name, count| name.length }.first.length
exits.each do |name, count|
padding = longest_insn_name_len + left_pad
padded_name = "%#{padding}s" % name
padded_count = format_number_pct(10, count, total_exits)
out.puts("#{padded_name}: #{padded_count}")
out.puts("#{padded_count}: #{name}")
end
else
out.puts "total_exits: " + format_number(10, total_exits)
Expand Down
2 changes: 2 additions & 0 deletions yjit/bindgen/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@ fn main() {
.allowlist_function("rb_yjit_mark_unused")
.allowlist_function("rb_yjit_get_page_size")
.allowlist_function("rb_yjit_iseq_builtin_attrs")
.allowlist_function("rb_yjit_iseq_inspect")
.allowlist_function("rb_yjit_builtin_function")
.allowlist_function("rb_set_cfp_(pc|sp)")
.allowlist_function("rb_yjit_multi_ractor_p")
Expand Down Expand Up @@ -377,6 +378,7 @@ fn main() {
// From gc.h and internal/gc.h
.allowlist_function("rb_class_allocate_instance")
.allowlist_function("rb_obj_info")
.allowlist_function("ruby_xfree")

// From include/ruby/debug.h
.allowlist_function("rb_profile_frames")
Expand Down
108 changes: 56 additions & 52 deletions yjit/src/codegen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use std::cell::Cell;
use std::cmp;
use std::cmp::min;
use std::collections::HashMap;
use std::ffi::c_void;
use std::ffi::CStr;
use std::mem;
use std::os::raw::c_int;
Expand Down Expand Up @@ -5966,32 +5967,6 @@ fn gen_send_cfunc(
}
}

// Log the name of the method we're calling to,
// note that we intentionally don't do this for inlined cfuncs
if get_option!(gen_stats) {
// TODO: extract code to get method name string into its own function

// Assemble the method name string
let mid = unsafe { vm_ci_mid(ci) };
let class_name = if let Some(class) = recv_known_class {
unsafe { cstr_to_rust_string(rb_class2name(class)) }.unwrap()
} else {
"Unknown".to_string()
};
let method_name = if mid != 0 {
unsafe { cstr_to_rust_string(rb_id2name(mid)) }.unwrap()
} else {
"Unknown".to_string()
};
let name_str = format!("{}#{}", class_name, method_name);

// Get an index for this cfunc name
let cfunc_idx = get_cfunc_idx(&name_str);

// Increment the counter for this cfunc
asm.ccall(incr_cfunc_counter as *const u8, vec![cfunc_idx.into()]);
}

// Check for interrupts
gen_check_ints(asm, Counter::guard_send_interrupted);

Expand Down Expand Up @@ -6180,6 +6155,20 @@ fn gen_send_cfunc(
let stack_ret = asm.stack_push(Type::Unknown);
asm.mov(stack_ret, ret);

// Log the name of the method we're calling to. We intentionally don't do this for inlined cfuncs.
// We also do this after the C call to minimize the impact of spill_temps() on asm.ccall().
if get_option!(gen_stats) {
// Assemble the method name string
let mid = unsafe { vm_ci_mid(ci) };
let name_str = get_method_name(recv_known_class, mid);

// Get an index for this cfunc name
let cfunc_idx = get_cfunc_idx(&name_str);

// Increment the counter for this cfunc
asm.ccall(incr_cfunc_counter as *const u8, vec![cfunc_idx.into()]);
}

// Pop the stack frame (ec->cfp++)
// Instead of recalculating, we can reuse the previous CFP, which is stored in a callee-saved
// register
Expand Down Expand Up @@ -6650,6 +6639,9 @@ fn gen_send_iseq(
}
}

// Increment total ISEQ send count
gen_counter_incr(asm, Counter::num_send_iseq);

// Shortcut for special `Primitive.attr! :leaf` builtins
let builtin_attrs = unsafe { rb_yjit_iseq_builtin_attrs(iseq) };
let builtin_func_raw = unsafe { rb_yjit_builtin_function(iseq) };
Expand All @@ -6671,7 +6663,7 @@ fn gen_send_iseq(
}

asm_comment!(asm, "inlined leaf builtin");
gen_counter_incr(asm, Counter::num_send_leaf_builtin);
gen_counter_incr(asm, Counter::num_send_iseq_leaf);

// The callee may allocate, e.g. Integer#abs on a Bignum.
// Save SP for GC, save PC for allocation tracing, and prepare
Expand Down Expand Up @@ -6705,7 +6697,7 @@ fn gen_send_iseq(
// Inline simple ISEQs whose return value is known at compile time
if let (Some(value), None, false) = (iseq_get_return_value(iseq), block_arg_type, opt_send_call) {
asm_comment!(asm, "inlined simple ISEQ");
gen_counter_incr(asm, Counter::num_send_inline);
gen_counter_incr(asm, Counter::num_send_iseq_inline);

// Pop receiver and arguments
asm.stack_pop(argc as usize + if captured_opnd.is_some() { 0 } else { 1 });
Expand Down Expand Up @@ -7210,6 +7202,19 @@ fn gen_send_iseq(
pc: None, // We are calling into jitted code, which will set the PC as necessary
});

// Log the name of the method we're calling to. We intentionally don't do this for inlined ISEQs.
// We also do this after gen_push_frame() to minimize the impact of spill_temps() on asm.ccall().
if get_option!(gen_stats) {
// Assemble the ISEQ name string
let name_str = get_iseq_name(iseq);

// Get an index for this ISEQ name
let iseq_idx = get_iseq_idx(&name_str);

// Increment the counter for this cfunc
asm.ccall(incr_iseq_counter as *const u8, vec![iseq_idx.into()]);
}

// No need to set cfp->pc since the callee sets it whenever calling into routines
// that could look at it through jit_save_pc().
// mov(cb, REG0, const_ptr_opnd(start_pc));
Expand Down Expand Up @@ -7639,16 +7644,7 @@ fn gen_send_general(

// Log the name of the method we're calling to
#[cfg(feature = "disasm")]
{
let class_name = unsafe { cstr_to_rust_string(rb_class2name(comptime_recv_klass)) };
let method_name = unsafe { cstr_to_rust_string(rb_id2name(mid)) };
match (class_name, method_name) {
(Some(class_name), Some(method_name)) => {
asm_comment!(asm, "call to {}#{}", class_name, method_name);
}
_ => {}
}
}
asm_comment!(asm, "call to {}", get_method_name(Some(comptime_recv_klass), mid));

// Gather some statistics about sends
gen_counter_incr(asm, Counter::num_send);
Expand Down Expand Up @@ -8069,6 +8065,27 @@ fn gen_send_general(
}
}

/// Assemble "{class_name}#{method_name}" from a class pointer and a method ID
fn get_method_name(class: Option<VALUE>, mid: u64) -> String {
let class_name = class.and_then(|class| unsafe {
cstr_to_rust_string(rb_class2name(class))
}).unwrap_or_else(|| "Unknown".to_string());
let method_name = if mid != 0 {
unsafe { cstr_to_rust_string(rb_id2name(mid)) }
} else {
None
}.unwrap_or_else(|| "Unknown".to_string());
format!("{}#{}", class_name, method_name)
}

/// Assemble "{label}@{iseq_path}:{lineno}" (iseq_inspect() format) from an ISEQ
fn get_iseq_name(iseq: IseqPtr) -> String {
let c_string = unsafe { rb_yjit_iseq_inspect(iseq) };
let string = unsafe { CStr::from_ptr(c_string) }.to_str()
.unwrap_or_else(|_| "not UTF-8").to_string();
unsafe { ruby_xfree(c_string as *mut c_void); }
string
}

/// Shifts the stack for send in order to remove the name of the method
/// Comment below borrow from vm_call_opt_send in vm_insnhelper.c
Expand Down Expand Up @@ -8239,20 +8256,7 @@ fn gen_invokeblock_specialized(
Counter::guard_invokeblock_iseq_block_changed,
);

gen_send_iseq(
jit,
asm,
ocb,
comptime_iseq,
ci,
VM_FRAME_MAGIC_BLOCK,
None,
0 as _,
None,
flags,
argc,
Some(captured_opnd),
)
gen_send_iseq(jit, asm, ocb, comptime_iseq, ci, VM_FRAME_MAGIC_BLOCK, None, 0 as _, None, flags, argc, Some(captured_opnd))
} else if comptime_handler.0 & 0x3 == 0x3 { // VM_BH_IFUNC_P
// We aren't handling CALLER_SETUP_ARG and CALLER_REMOVE_EMPTY_KW_SPLAT yet.
if flags & VM_CALL_ARGS_SPLAT != 0 {
Expand Down
2 changes: 2 additions & 0 deletions yjit/src/cruby_bindings.inc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -907,6 +907,7 @@ pub const RUBY_OFFSET_RSTRING_LEN: rstring_offsets = 16;
pub type rstring_offsets = u32;
pub type rb_seq_param_keyword_struct = rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword;
extern "C" {
pub fn ruby_xfree(ptr: *mut ::std::os::raw::c_void);
pub fn rb_class_attached_object(klass: VALUE) -> VALUE;
pub fn rb_singleton_class(obj: VALUE) -> VALUE;
pub fn rb_get_alloc_func(klass: VALUE) -> rb_alloc_func_t;
Expand Down Expand Up @@ -1154,6 +1155,7 @@ extern "C" {
pub fn rb_yjit_fix_mod_fix(recv: VALUE, obj: VALUE) -> VALUE;
pub fn rb_yjit_ruby2_keywords_splat_p(obj: VALUE) -> usize;
pub fn rb_yjit_dump_iseq_loc(iseq: *const rb_iseq_t, insn_idx: u32);
pub fn rb_yjit_iseq_inspect(iseq: *const rb_iseq_t) -> *mut ::std::os::raw::c_char;
pub fn rb_FL_TEST(obj: VALUE, flags: VALUE) -> VALUE;
pub fn rb_FL_TEST_RAW(obj: VALUE, flags: VALUE) -> VALUE;
pub fn rb_RB_TYPE_P(obj: VALUE, t: ruby_value_type) -> bool;
Expand Down