Skip to content

Commit

Permalink
add --yjit-dump-iseqs param (Shopify#332)
Browse files Browse the repository at this point in the history
  • Loading branch information
noahgibbs authored and k0kubun committed Aug 24, 2022
1 parent 0ad9cc1 commit b4be3c0
Show file tree
Hide file tree
Showing 7 changed files with 170 additions and 41 deletions.
12 changes: 12 additions & 0 deletions yjit.c
Expand Up @@ -399,6 +399,18 @@ rb_str_bytesize(VALUE str)
return LONG2NUM(RSTRING_LEN(str));
}

unsigned long
rb_RSTRING_LEN(VALUE str)
{
return RSTRING_LEN(str);
}

char *
rb_RSTRING_PTR(VALUE str)
{
return RSTRING_PTR(str);
}

// This is defined only as a named struct inside rb_iseq_constant_body.
// By giving it a separate typedef, we make it nameable by rust-bindgen.
// Bindgen's temp/anon name isn't guaranteed stable.
Expand Down
7 changes: 7 additions & 0 deletions yjit/bindgen/src/main.rs
Expand Up @@ -70,6 +70,9 @@ fn main() {
.allowlist_function("rb_str_buf_append")
.allowlist_function("rb_str_dup")

// From encindex.h
.allowlist_type("ruby_preserved_encindex")

// This struct is public to Ruby C extensions
// From include/ruby/internal/core/rbasic.h
.allowlist_type("RBasic")
Expand Down Expand Up @@ -240,6 +243,7 @@ fn main() {
.allowlist_var("VM_ENV_DATA_INDEX_SPECVAL")
.allowlist_var("VM_ENV_DATA_INDEX_FLAGS")
.allowlist_var("VM_ENV_DATA_SIZE")
.allowlist_function("rb_iseq_path")

// From yjit.c
.allowlist_function("rb_iseq_(get|set)_yjit_payload")
Expand All @@ -265,6 +269,8 @@ fn main() {
.allowlist_function("rb_yjit_for_each_iseq")
.allowlist_function("rb_yjit_obj_written")
.allowlist_function("rb_yjit_str_simple_append")
.allowlist_function("rb_RSTRING_PTR")
.allowlist_function("rb_RSTRING_LEN")
.allowlist_function("rb_ENCODING_GET")
.allowlist_function("rb_yjit_exit_locations_dict")

Expand All @@ -282,6 +288,7 @@ fn main() {
.allowlist_function("rb_vm_insn_addr2opcode")
.allowlist_function("rb_iseqw_to_iseq")
.allowlist_function("rb_iseq_each")
.allowlist_function("rb_iseq_method_name")

// From builtin.h
.allowlist_type("rb_builtin_function.*")
Expand Down
27 changes: 27 additions & 0 deletions yjit/src/core.rs
Expand Up @@ -6,6 +6,8 @@ use crate::cruby::*;
use crate::options::*;
use crate::stats::*;
use crate::utils::*;
#[cfg(feature="disasm")]
use crate::disasm::*;
use core::ffi::c_void;
use std::cell::*;
use std::hash::{Hash, Hasher};
Expand Down Expand Up @@ -1426,6 +1428,20 @@ fn gen_block_series_body(
last_blockref = new_blockref;
}

#[cfg(feature = "disasm")]
{
// If dump_iseq_disasm is active, see if this iseq's location matches the given substring.
// If so, we print the new blocks to the console.
if let Some(substr) = get_option_ref!(dump_iseq_disasm).as_ref() {
let iseq_location = iseq_get_location(blockid.iseq);
if iseq_location.contains(substr) {
let last_block = last_blockref.borrow();
println!("Compiling {} block(s) for {}, ISEQ offsets [{}, {})", batch.len(), iseq_location, blockid.idx, last_block.end_idx);
println!("{}", disasm_iseq_insn_range(blockid.iseq, blockid.idx, last_block.end_idx));
}
}
}

Some(first_block)
}

Expand Down Expand Up @@ -1956,6 +1972,17 @@ pub fn invalidate_block_version(blockref: &BlockRef) {

verify_blockid(block.blockid);

#[cfg(feature = "disasm")]
{
// If dump_iseq_disasm is specified, print to console that blocks for matching ISEQ names were invalidated.
if let Some(substr) = get_option_ref!(dump_iseq_disasm).as_ref() {
let iseq_location = iseq_get_location(block.blockid.iseq);
if iseq_location.contains(substr) {
println!("Invalidating block from {}, ISEQ offsets [{}, {})", iseq_location, block.blockid.idx, block.end_idx);
}
}
}

// Remove this block from the version array
remove_block_version(blockref);

Expand Down
26 changes: 26 additions & 0 deletions yjit/src/cruby_bindings.inc.rs
Expand Up @@ -246,6 +246,20 @@ pub const RUBY_ENCODING_SHIFT: ruby_encoding_consts = 22;
pub const RUBY_ENCODING_MASK: ruby_encoding_consts = 532676608;
pub const RUBY_ENCODING_MAXNAMELEN: ruby_encoding_consts = 42;
pub type ruby_encoding_consts = u32;
pub const RUBY_ENCINDEX_ASCII_8BIT: ruby_preserved_encindex = 0;
pub const RUBY_ENCINDEX_UTF_8: ruby_preserved_encindex = 1;
pub const RUBY_ENCINDEX_US_ASCII: ruby_preserved_encindex = 2;
pub const RUBY_ENCINDEX_UTF_16BE: ruby_preserved_encindex = 3;
pub const RUBY_ENCINDEX_UTF_16LE: ruby_preserved_encindex = 4;
pub const RUBY_ENCINDEX_UTF_32BE: ruby_preserved_encindex = 5;
pub const RUBY_ENCINDEX_UTF_32LE: ruby_preserved_encindex = 6;
pub const RUBY_ENCINDEX_UTF_16: ruby_preserved_encindex = 7;
pub const RUBY_ENCINDEX_UTF_32: ruby_preserved_encindex = 8;
pub const RUBY_ENCINDEX_UTF8_MAC: ruby_preserved_encindex = 9;
pub const RUBY_ENCINDEX_EUC_JP: ruby_preserved_encindex = 10;
pub const RUBY_ENCINDEX_Windows_31J: ruby_preserved_encindex = 11;
pub const RUBY_ENCINDEX_BUILTIN_MAX: ruby_preserved_encindex = 12;
pub type ruby_preserved_encindex = u32;
extern "C" {
pub fn rb_obj_info_dump(obj: VALUE);
}
Expand Down Expand Up @@ -649,6 +663,9 @@ pub const VM_ENV_FLAG_ESCAPED: vm_frame_env_flags = 4;
pub const VM_ENV_FLAG_WB_REQUIRED: vm_frame_env_flags = 8;
pub const VM_ENV_FLAG_ISOLATED: vm_frame_env_flags = 16;
pub type vm_frame_env_flags = u32;
extern "C" {
pub fn rb_iseq_path(iseq: *const rb_iseq_t) -> VALUE;
}
extern "C" {
pub fn rb_vm_bh_to_procval(ec: *const rb_execution_context_t, block_handler: VALUE) -> VALUE;
}
Expand Down Expand Up @@ -969,6 +986,9 @@ extern "C" {
extern "C" {
pub fn rb_iseqw_to_iseq(iseqw: VALUE) -> *const rb_iseq_t;
}
extern "C" {
pub fn rb_iseq_method_name(iseq: *const rb_iseq_t) -> VALUE;
}
extern "C" {
pub fn rb_vm_barrier();
}
Expand Down Expand Up @@ -1020,6 +1040,12 @@ extern "C" {
extern "C" {
pub fn rb_iseq_opcode_at_pc(iseq: *const rb_iseq_t, pc: *const VALUE) -> ::std::os::raw::c_int;
}
extern "C" {
pub fn rb_RSTRING_LEN(str_: VALUE) -> ::std::os::raw::c_ulong;
}
extern "C" {
pub fn rb_RSTRING_PTR(str_: VALUE) -> *mut ::std::os::raw::c_char;
}
pub type rb_seq_param_keyword_struct = rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword;
extern "C" {
pub fn rb_leaf_invokebuiltin_iseq_p(iseq: *const rb_iseq_t) -> bool;
Expand Down
84 changes: 44 additions & 40 deletions yjit/src/disasm.rs
Expand Up @@ -26,15 +26,17 @@ pub extern "C" fn rb_yjit_disasm_iseq(_ec: EcPtr, _ruby_self: VALUE, iseqw: VALU
// Get the iseq pointer from the wrapper
let iseq = unsafe { rb_iseqw_to_iseq(iseqw) };

let out_string = disasm_iseq(iseq);
// This will truncate disassembly of methods with 10k+ bytecodes.
// That's a good thing - this prints to console.
let out_string = disasm_iseq_insn_range(iseq, 0, 9999);

return rust_str_to_ruby(&out_string);
}
}

#[cfg(feature = "disasm")]
fn disasm_iseq(iseq: IseqPtr) -> String {
let mut out = String::from("");
pub fn disasm_iseq_insn_range(iseq: IseqPtr, start_idx: u32, end_idx: u32) -> String {
let mut out = String::from("");

// Get a list of block versions generated for this iseq
let mut block_list = get_iseq_block_list(iseq);
Expand Down Expand Up @@ -84,47 +86,49 @@ fn disasm_iseq(iseq: IseqPtr) -> String {
for block_idx in 0..block_list.len() {
let block = block_list[block_idx].borrow();
let blockid = block.get_blockid();
let end_idx = block.get_end_idx();
let start_addr = block.get_start_addr().unwrap().raw_ptr();
let end_addr = block.get_end_addr().unwrap().raw_ptr();
let code_size = block.code_size();

// Write some info about the current block
let block_ident = format!(
"BLOCK {}/{}, ISEQ RANGE [{},{}), {} bytes ",
block_idx + 1,
block_list.len(),
blockid.idx,
end_idx,
code_size
);
out.push_str(&format!("== {:=<60}\n", block_ident));

// Disassemble the instructions
let code_slice = unsafe { std::slice::from_raw_parts(start_addr, code_size) };
let insns = cs.disasm_all(code_slice, start_addr as u64).unwrap();

// For each instruction in this block
for insn in insns.as_ref() {
// Comments for this block
if let Some(comment_list) = global_cb.comments_at(insn.address() as usize) {
for comment in comment_list {
out.push_str(&format!(" \x1b[1m# {}\x1b[0m\n", comment));
if blockid.idx >= start_idx && blockid.idx < end_idx {
let end_idx = block.get_end_idx();
let start_addr = block.get_start_addr().unwrap().raw_ptr();
let end_addr = block.get_end_addr().unwrap().raw_ptr();
let code_size = block.code_size();

// Write some info about the current block
let block_ident = format!(
"BLOCK {}/{}, ISEQ RANGE [{},{}), {} bytes ",
block_idx + 1,
block_list.len(),
blockid.idx,
end_idx,
code_size
);
out.push_str(&format!("== {:=<60}\n", block_ident));

// Disassemble the instructions
let code_slice = unsafe { std::slice::from_raw_parts(start_addr, code_size) };
let insns = cs.disasm_all(code_slice, start_addr as u64).unwrap();

// For each instruction in this block
for insn in insns.as_ref() {
// Comments for this block
if let Some(comment_list) = global_cb.comments_at(insn.address() as usize) {
for comment in comment_list {
out.push_str(&format!(" \x1b[1m# {}\x1b[0m\n", comment));
}
}
out.push_str(&format!(" {}\n", insn));
}
out.push_str(&format!(" {}\n", insn));
}

// If this is not the last block
if block_idx < block_list.len() - 1 {
// Compute the size of the gap between this block and the next
let next_block = block_list[block_idx + 1].borrow();
let next_start_addr = next_block.get_start_addr().unwrap().raw_ptr();
let gap_size = (next_start_addr as usize) - (end_addr as usize);
// If this is not the last block
if block_idx < block_list.len() - 1 {
// Compute the size of the gap between this block and the next
let next_block = block_list[block_idx + 1].borrow();
let next_start_addr = next_block.get_start_addr().unwrap().raw_ptr();
let gap_size = (next_start_addr as usize) - (end_addr as usize);

// Log the size of the gap between the blocks if nonzero
if gap_size > 0 {
out.push_str(&format!("... {} byte gap ...\n", gap_size));
// Log the size of the gap between the blocks if nonzero
if gap_size > 0 {
out.push_str(&format!("... {} byte gap ...\n", gap_size));
}
}
}
}
Expand Down
20 changes: 19 additions & 1 deletion yjit/src/options.rs
@@ -1,7 +1,7 @@
use std::ffi::CStr;

// Command-line options
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
#[derive(Clone, PartialEq, Eq, Debug)]
#[repr(C)]
pub struct Options {
// Size of the executable memory block to allocate in MiB
Expand Down Expand Up @@ -30,6 +30,9 @@ pub struct Options {
/// Dump compiled and executed instructions for debugging
pub dump_insns: bool,

/// Print when specific ISEQ items are compiled or invalidated
pub dump_iseq_disasm: Option<String>,

/// Verify context objects (debug mode only)
pub verify_ctx: bool,

Expand All @@ -52,6 +55,7 @@ pub static mut OPTIONS: Options = Options {
dump_insns: false,
verify_ctx: false,
global_constant_state: false,
dump_iseq_disasm: None,
};

/// Macro to get an option value by name
Expand All @@ -64,6 +68,16 @@ macro_rules! get_option {
}
pub(crate) use get_option;

/// Macro to reference an option value by name; we assume it's a cloneable type like String or an Option of same.
macro_rules! get_option_ref {
// Unsafe is ok here because options are initialized
// once before any Ruby code executes
($option_name:ident) => {
unsafe { &(OPTIONS.$option_name) }
};
}
pub(crate) use get_option_ref;

/// Expected to receive what comes after the third dash in "--yjit-*".
/// Empty string means user passed only "--yjit". C code rejects when
/// they pass exact "--yjit-".
Expand Down Expand Up @@ -105,6 +119,10 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> {
}
},

("dump-iseq-disasm", _) => unsafe {
OPTIONS.dump_iseq_disasm = Some(opt_val.to_string());
},

("greedy-versioning", "") => unsafe { OPTIONS.greedy_versioning = true },
("no-type-prop", "") => unsafe { OPTIONS.no_type_prop = true },
("stats", "") => unsafe { OPTIONS.gen_stats = true },
Expand Down
35 changes: 35 additions & 0 deletions yjit/src/utils.rs
Expand Up @@ -71,6 +71,41 @@ macro_rules! offset_of {
#[allow(unused)]
pub(crate) use offset_of;

// Convert a CRuby UTF-8-encoded RSTRING into a Rust string.
// This should work fine on ASCII strings and anything else
// that is considered legal UTF-8, including embedded nulls.
fn ruby_str_to_rust(v: VALUE) -> String {
// Make sure the CRuby encoding is UTF-8 compatible
let encoding = unsafe { rb_ENCODING_GET(v) } as u32;
assert!(encoding == RUBY_ENCINDEX_ASCII_8BIT || encoding == RUBY_ENCINDEX_UTF_8 || encoding == RUBY_ENCINDEX_US_ASCII);

let str_ptr = unsafe { rb_RSTRING_PTR(v) } as *mut u8;
let str_len: usize = unsafe { rb_RSTRING_LEN(v) }.try_into().unwrap();
let str_slice: &[u8] = unsafe { slice::from_raw_parts(str_ptr, str_len) };
String::from_utf8(str_slice.to_vec()).unwrap() // does utf8 validation
}

// Location is the file defining the method, colon, method name.
// Filenames are sometimes internal strings supplied to eval,
// so be careful with them.
pub fn iseq_get_location(iseq: IseqPtr) -> String {
let iseq_path = unsafe { rb_iseq_path(iseq) };
let iseq_method = unsafe { rb_iseq_method_name(iseq) };

let mut s = if iseq_path == Qnil {
"None".to_string()
} else {
ruby_str_to_rust(iseq_path)
};
s.push_str(":");
if iseq_method == Qnil {
s.push_str("None");
} else {
s.push_str(& ruby_str_to_rust(iseq_method));
}
s
}

#[cfg(test)]
mod tests {
#[test]
Expand Down

0 comments on commit b4be3c0

Please sign in to comment.