Skip to content

Commit

Permalink
Fix incorrect line numbers in GC hook
Browse files Browse the repository at this point in the history
If the previous instruction is not a leaf instruction, then the PC was
incremented before the instruction was ran (meaning the currently
executing instruction is actually the previous instruction), so we
should not increment the PC otherwise we will calculate the source
line for the next instruction.

This bug can be reproduced in the following script:

```
require "objspace"

ObjectSpace.trace_object_allocations_start
a =

  1.0 / 0.0
p [ObjectSpace.allocation_sourceline(a), ObjectSpace.allocation_sourcefile(a)]
```

Which outputs: [4, "test.rb"]

This is incorrect because the object was allocated on line 10 and not
line 4. The behaviour is correct when we use a leaf instruction (e.g.
if we replaced `1.0 / 0.0` with `"hello"`), then the output is:
[10, "test.rb"].

[Bug #19456]
  • Loading branch information
peterzhu2118 committed Feb 24, 2023
1 parent d2631c4 commit 3e09822
Show file tree
Hide file tree
Showing 8 changed files with 68 additions and 10 deletions.
1 change: 1 addition & 0 deletions common.mk
Expand Up @@ -6791,6 +6791,7 @@ gc.$(OBJEXT): $(top_srcdir)/internal/basic_operators.h
gc.$(OBJEXT): $(top_srcdir)/internal/bignum.h
gc.$(OBJEXT): $(top_srcdir)/internal/bits.h
gc.$(OBJEXT): $(top_srcdir)/internal/class.h
gc.$(OBJEXT): $(top_srcdir)/internal/compile.h
gc.$(OBJEXT): $(top_srcdir)/internal/compilers.h
gc.$(OBJEXT): $(top_srcdir)/internal/complex.h
gc.$(OBJEXT): $(top_srcdir)/internal/cont.h
Expand Down
12 changes: 12 additions & 0 deletions compile.c
Expand Up @@ -10264,6 +10264,18 @@ dump_disasm_list_with_cursor(const LINK_ELEMENT *link, const LINK_ELEMENT *curr,
fflush(stdout);
}

bool
rb_insns_leaf_p(int i)
{
return insn_leaf_p(i);
}

int
rb_insn_len(VALUE insn)
{
return insn_len(insn);
}

const char *
rb_insns_name(int i)
{
Expand Down
21 changes: 19 additions & 2 deletions gc.c
Expand Up @@ -100,6 +100,7 @@
#include "id_table.h"
#include "internal.h"
#include "internal/class.h"
#include "internal/compile.h"
#include "internal/complex.h"
#include "internal/cont.h"
#include "internal/error.h"
Expand Down Expand Up @@ -2485,8 +2486,24 @@ gc_event_hook_body(rb_execution_context_t *ec, rb_objspace_t *objspace, const rb
{
const VALUE *pc = ec->cfp->pc;
if (pc && VM_FRAME_RUBYFRAME_P(ec->cfp)) {
/* increment PC because source line is calculated with PC-1 */
ec->cfp->pc++;
int prev_opcode = rb_vm_insn_addr2opcode((void *)*ec->cfp->iseq->body->iseq_encoded);
for (const VALUE *insn = ec->cfp->iseq->body->iseq_encoded; insn < pc; insn += rb_insn_len(prev_opcode)) {
prev_opcode = rb_vm_insn_addr2opcode((void *)*insn);
}

/* If the previous instruction is a leaf instruction, then the PC is
* the currently executing instruction. We should increment the PC
* because the source line is calculated with PC-1 in calc_pos.
*
* If the previous instruction is not a leaf instruction, then the PC
* was incremented before the instruction was ran (meaning the
* currently executing instruction is actually the previous
* instruction), so we should not increment the PC otherwise we will
* calculate the source line for the next instruction.
*/
if (rb_insns_leaf_p(prev_opcode)) {
ec->cfp->pc++;
}
}
EXEC_EVENT_HOOK(ec, event, ec->cfp->self, 0, 0, 0, data);
ec->cfp->pc = pc;
Expand Down
2 changes: 2 additions & 0 deletions internal/compile.h
Expand Up @@ -17,6 +17,8 @@ struct rb_iseq_struct; /* in vm_core.h */
/* compile.c */
int rb_dvar_defined(ID, const struct rb_iseq_struct *);
int rb_local_defined(ID, const struct rb_iseq_struct *);
bool rb_insns_leaf_p(int i);
int rb_insn_len(VALUE insn);
const char *rb_insns_name(int i);
VALUE rb_insns_name_array(void);
int rb_iseq_cdhash_cmp(VALUE val, VALUE lit);
Expand Down
8 changes: 8 additions & 0 deletions test/objspace/test_objspace.rb
Expand Up @@ -216,6 +216,14 @@ def test_trace_object_allocations
assert_equal(c3, ObjectSpace.allocation_generation(o3))
assert_equal(self.class.name, ObjectSpace.allocation_class_path(o3))
assert_equal(__method__, ObjectSpace.allocation_method_id(o3))

# [Bug #19456]
o4 =
# This line intentionally left blank
# This line intentionally left blank
1.0 / 0.0; line4 = __LINE__; c4 = GC.count
assert_equal(__FILE__, ObjectSpace.allocation_sourcefile(o4))
assert_equal(line4, ObjectSpace.allocation_sourceline(o4))
}
end

Expand Down
25 changes: 25 additions & 0 deletions tool/ruby_vm/views/_leaf_helpers.erb
Expand Up @@ -10,6 +10,31 @@

#include "iseq.h"

extern const bool rb_vm_insn_leaf_p[];

#ifdef RUBY_VM_INSNS_INFO
const bool rb_vm_insn_leaf_p[] = {
% RubyVM::Instructions.each_slice(20) do |insns|
<%= insns.map do |insn|
if insn.is_a?(RubyVM::BareInstructions)
insn.always_leaf? ? '1' : '0'
else
'0'
end
end.join(', ')
%>,
% end
};
#endif

CONSTFUNC(MAYBE_UNUSED(static bool insn_leaf_p(VALUE insn)));

bool
insn_leaf_p(VALUE insn)
{
return rb_vm_insn_leaf_p[insn];
}

// This is used to tell MJIT that this insn would be leaf if CHECK_INTS didn't exist.
// It should be used only when RUBY_VM_CHECK_INTS is directly written in insns.def.
static bool leafness_of_check_ints = false;
Expand Down
7 changes: 0 additions & 7 deletions yjit.c
Expand Up @@ -480,13 +480,6 @@ rb_insn_name(VALUE insn)
return insn_name(insn);
}

// Query the instruction length in bytes for YARV opcode insn
int
rb_insn_len(VALUE insn)
{
return insn_len(insn);
}

unsigned int
rb_vm_ci_argc(const struct rb_callinfo *ci)
{
Expand Down
2 changes: 1 addition & 1 deletion yjit/src/cruby_bindings.inc.rs
Expand Up @@ -1181,6 +1181,7 @@ extern "C" {
key: st_data_t,
pval: *mut st_data_t,
) -> ::std::os::raw::c_int;
pub fn rb_insn_len(insn: VALUE) -> ::std::os::raw::c_int;
pub fn rb_vm_insn_decode(encoded: VALUE) -> ::std::os::raw::c_int;
pub fn rb_vm_insn_addr2opcode(addr: *const ::std::os::raw::c_void) -> ::std::os::raw::c_int;
pub fn rb_iseq_line_no(iseq: *const rb_iseq_t, pos: usize) -> ::std::os::raw::c_uint;
Expand Down Expand Up @@ -1221,7 +1222,6 @@ extern "C" {
pub fn rb_RSTRING_PTR(str_: VALUE) -> *mut ::std::os::raw::c_char;
pub fn rb_yjit_get_proc_ptr(procv: VALUE) -> *mut rb_proc_t;
pub fn rb_insn_name(insn: VALUE) -> *const ::std::os::raw::c_char;
pub fn rb_insn_len(insn: VALUE) -> ::std::os::raw::c_int;
pub fn rb_vm_ci_argc(ci: *const rb_callinfo) -> ::std::os::raw::c_uint;
pub fn rb_vm_ci_mid(ci: *const rb_callinfo) -> ID;
pub fn rb_vm_ci_flag(ci: *const rb_callinfo) -> ::std::os::raw::c_uint;
Expand Down

0 comments on commit 3e09822

Please sign in to comment.