diff --git a/bootstraptest/test_yjit.rb b/bootstraptest/test_yjit.rb index ee8991833cb908..f30c5ff90008ca 100644 --- a/bootstraptest/test_yjit.rb +++ b/bootstraptest/test_yjit.rb @@ -2474,6 +2474,18 @@ def foo test } if false # disabled for now since OOM crashes in the test harness +assert_equal 'ok', %q{ + # Try to compile new method while OOM + def foo + :ok + end + + RubyVM::YJIT.simulate_oom! if defined?(RubyVM::YJIT) + + foo + foo +} + # struct aref embedded assert_equal '2', %q{ def foo(s) diff --git a/yjit.c b/yjit.c index 56173a13604b83..0bf84f13b703aa 100644 --- a/yjit.c +++ b/yjit.c @@ -122,6 +122,7 @@ YJIT_DECLARE_COUNTERS( vm_insns_count, compiled_iseq_count, compiled_block_count, + compilation_failure, exit_from_branch_stub, diff --git a/yjit.rb b/yjit.rb index c555fd27cc40c9..d06f0961fadd9d 100644 --- a/yjit.rb +++ b/yjit.rb @@ -193,8 +193,12 @@ def _print_stats total_insns_count = retired_in_yjit + stats[:vm_insns_count] yjit_ratio_pct = 100.0 * retired_in_yjit.to_f / total_insns_count + # Number of failed compiler invocations + compilation_failure = stats[:compilation_failure] + $stderr.puts "bindings_allocations: " + ("%10d" % stats[:binding_allocations]) $stderr.puts "bindings_set: " + ("%10d" % stats[:binding_set]) + $stderr.puts "compilation_failure: " + ("%10d" % compilation_failure) if compilation_failure != 0 $stderr.puts "compiled_iseq_count: " + ("%10d" % stats[:compiled_iseq_count]) $stderr.puts "compiled_block_count: " + ("%10d" % stats[:compiled_block_count]) $stderr.puts "invalidation_count: " + ("%10d" % stats[:invalidation_count]) diff --git a/yjit_codegen.c b/yjit_codegen.c index 2cd4fd2bda008d..7b44874af8613d 100644 --- a/yjit_codegen.c +++ b/yjit_codegen.c @@ -545,10 +545,15 @@ yjit_entry_prologue(codeblock_t *cb, const rb_iseq_t *iseq) { RUBY_ASSERT(cb != NULL); - if (cb->write_pos + 1024 >= cb->mem_size) { - rb_bug("out of executable memory"); + enum { MAX_PROLOGUE_SIZE = 1024 }; + + // Check if we have enough executable memory + if (cb->write_pos + MAX_PROLOGUE_SIZE >= cb->mem_size) { + return NULL; } + const uint32_t old_write_pos = cb->write_pos; + // Align the current write positon to cache line boundaries cb_align_pos(cb, 64); @@ -581,6 +586,9 @@ yjit_entry_prologue(codeblock_t *cb, const rb_iseq_t *iseq) yjit_pc_guard(cb, iseq); } + // Verify MAX_PROLOGUE_SIZE + RUBY_ASSERT_ALWAYS(cb->write_pos - old_write_pos <= MAX_PROLOGUE_SIZE); + return code_ptr; } @@ -625,32 +633,46 @@ jit_jump_to_next_insn(jitstate_t *jit, const ctx_t *current_context) ); } -// Compile a sequence of bytecode instructions for a given basic block version -static void -yjit_gen_block(block_t *block, rb_execution_context_t *ec) +// Compile a sequence of bytecode instructions for a given basic block version. +// Part of gen_block_version(). +static block_t * +gen_single_block(blockid_t blockid, const ctx_t *start_ctx, rb_execution_context_t *ec) { RUBY_ASSERT(cb != NULL); - RUBY_ASSERT(block != NULL); - RUBY_ASSERT(!(block->blockid.idx == 0 && block->ctx.stack_size > 0)); - // Copy the block's context to avoid mutating it - ctx_t ctx_copy = block->ctx; + // Check if there is enough executable memory. + // FIXME: This bound isn't enforced and long blocks can potentially use more. + enum { MAX_CODE_PER_BLOCK = 1024 }; + if (cb->write_pos + MAX_CODE_PER_BLOCK >= cb->mem_size) { + return NULL; + } + if (ocb->write_pos + MAX_CODE_PER_BLOCK >= ocb->mem_size) { + return NULL; + } + + // Allocate the new block + block_t *block = calloc(1, sizeof(block_t)); + if (!block) { + return NULL; + } + + // Copy the starting context to avoid mutating it + ctx_t ctx_copy = *start_ctx; ctx_t *ctx = &ctx_copy; + // Limit the number of specialized versions for this block + *ctx = limit_block_versions(blockid, ctx); + + // Save the starting context on the block. + block->blockid = blockid; + block->ctx = *ctx; + + RUBY_ASSERT(!(blockid.idx == 0 && start_ctx->stack_size > 0)); + const rb_iseq_t *iseq = block->blockid.iseq; uint32_t insn_idx = block->blockid.idx; const uint32_t starting_insn_idx = insn_idx; - // NOTE: if we are ever deployed in production, we - // should probably just log an error and return NULL here, - // so we can fail more gracefully - if (cb->write_pos + 1024 >= cb->mem_size) { - rb_bug("out of executable memory"); - } - if (ocb->write_pos + 1024 >= ocb->mem_size) { - rb_bug("out of executable memory (outlined block)"); - } - // Initialize a JIT state object jitstate_t jit = { .cb = cb, @@ -765,6 +787,8 @@ yjit_gen_block(block_t *block, rb_execution_context_t *ec) idx += insn_len(opcode); } } + + return block; } static codegen_status_t gen_opt_send_without_block(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb); diff --git a/yjit_codegen.h b/yjit_codegen.h index bbd29e671b8f76..e3b971af3fcfd1 100644 --- a/yjit_codegen.h +++ b/yjit_codegen.h @@ -14,7 +14,7 @@ static void jit_ensure_block_entry_exit(jitstate_t *jit); static uint8_t *yjit_entry_prologue(codeblock_t *cb, const rb_iseq_t *iseq); -static void yjit_gen_block(block_t *block, rb_execution_context_t *ec); +static block_t *gen_single_block(blockid_t blockid, const ctx_t *start_ctx, rb_execution_context_t *ec); static void gen_code_for_exit_from_stub(void); diff --git a/yjit_core.c b/yjit_core.c index 4460d325fc3588..f19b83c5ff522e 100644 --- a/yjit_core.c +++ b/yjit_core.c @@ -542,9 +542,10 @@ static size_t get_num_versions(blockid_t blockid) // Keep track of a block version. Block should be fully constructed. static void -add_block_version(blockid_t blockid, block_t *block) +add_block_version(block_t *block) { - const rb_iseq_t *iseq = block->blockid.iseq; + const blockid_t blockid = block->blockid; + const rb_iseq_t *iseq = blockid.iseq; struct rb_iseq_constant_body *body = iseq->body; // Function entry blocks must have stack size 0 @@ -704,57 +705,66 @@ find_block_version(blockid_t blockid, const ctx_t *ctx) // Produce a generic context when the block version limit is hit for a blockid // Note that this will mutate the ctx argument -static void -limit_block_versions(blockid_t blockid, ctx_t *ctx) +static ctx_t +limit_block_versions(blockid_t blockid, const ctx_t *ctx) { // Guard chains implement limits separately, do nothing if (ctx->chain_depth > 0) - return; + return *ctx; // If this block version we're about to add will hit the version limit - if (get_num_versions(blockid) + 1 >= rb_yjit_opts.max_versions) - { + if (get_num_versions(blockid) + 1 >= rb_yjit_opts.max_versions) { // Produce a generic context that stores no type information, - // but still respects the stack_size and sp_offset constraints + // but still respects the stack_size and sp_offset constraints. // This new context will then match all future requests. ctx_t generic_ctx = DEFAULT_CTX; generic_ctx.stack_size = ctx->stack_size; generic_ctx.sp_offset = ctx->sp_offset; // Mutate the incoming context - *ctx = generic_ctx; + return generic_ctx; } + + return *ctx; } -// Compile a new block version immediately +static void yjit_free_block(block_t *block); + +// Immediately compile a series of block versions at a starting point and +// return the starting block. static block_t * gen_block_version(blockid_t blockid, const ctx_t *start_ctx, rb_execution_context_t *ec) { - // Allocate a new block version object - block_t *block = calloc(1, sizeof(block_t)); - block->blockid = blockid; - memcpy(&block->ctx, start_ctx, sizeof(ctx_t)); - - // Store a pointer to the first block (returned by this function) - block_t *first_block = block; - - // Limit the number of specialized versions for this block - limit_block_versions(block->blockid, &block->ctx); + // Small array to keep track of all the blocks compiled per invocation. We + // tend to have small batches since we often break up compilation with lazy + // stubs. Compilation is successful only if the whole batch is successful. + enum { MAX_PER_BATCH = 64 }; + block_t *batch[MAX_PER_BATCH]; + int compiled_count = 0; + bool batch_success = true; + block_t *block; // Generate code for the first block - yjit_gen_block(block, ec); + block = gen_single_block(blockid, start_ctx, ec); + batch_success = block && compiled_count < MAX_PER_BATCH; + + if (batch_success) { + // Track the block + add_block_version(block); - // Keep track of the new block version - add_block_version(block->blockid, block); + batch[compiled_count] = block; + compiled_count++; + } // For each successor block to compile - for (;;) { + while (batch_success) { // If the previous block compiled doesn't have outgoing branches, stop if (rb_darray_size(block->outgoing) == 0) { break; } - // Get the last outgoing branch from the previous block + // Get the last outgoing branch from the previous block. Blocks can use + // gen_direct_jump() to request a block to be placed immediately after. branch_t *last_branch = rb_darray_back(block->outgoing); // If there is no next block to compile, stop @@ -766,32 +776,48 @@ gen_block_version(blockid_t blockid, const ctx_t *start_ctx, rb_execution_contex rb_bug("invalid target for last branch"); } - // Allocate a new block version object - // Use the context from the branch - block = calloc(1, sizeof(block_t)); - block->blockid = last_branch->targets[0]; - block->ctx = last_branch->target_ctxs[0]; - //memcpy(&block->ctx, ctx, sizeof(ctx_t)); + // Generate code for the current block using context from the last branch. + blockid_t requested_id = last_branch->targets[0]; + const ctx_t *requested_ctx = &last_branch->target_ctxs[0]; + block = gen_single_block(requested_id, requested_ctx, ec); + batch_success = block && compiled_count < MAX_PER_BATCH; - // Limit the number of specialized versions for this block - limit_block_versions(block->blockid, &block->ctx); - - // Generate code for the current block - yjit_gen_block(block, ec); - - // Keep track of the new block version - add_block_version(block->blockid, block); + // If the batch failed, stop + if (!batch_success) { + break; + } - // Patch the last branch address + // Connect the last branch and the new block last_branch->dst_addrs[0] = block->start_addr; rb_darray_append(&block->incoming, last_branch); last_branch->blocks[0] = block; // This block should immediately follow the last branch RUBY_ASSERT(block->start_addr == last_branch->end_addr); + + // Track the block + add_block_version(block); + + batch[compiled_count] = block; + compiled_count++; } - return first_block; + if (batch_success) { + // Success. Return first block in the batch. + RUBY_ASSERT(compiled_count > 0); + return batch[0]; + } + else { + // The batch failed. Free everything in the batch + for (int block_idx = 0; block_idx < compiled_count; block_idx++) { + yjit_free_block(batch[block_idx]); + } + +#if YJIT_STATS + yjit_runtime_counters.compilation_failure++; +#endif + return NULL; + } } // Generate a block version that is an entry point inserted into an iseq @@ -807,15 +833,14 @@ gen_entry_point(const rb_iseq_t *iseq, uint32_t insn_idx, rb_execution_context_t // The entry context makes no assumptions about types blockid_t blockid = { iseq, insn_idx }; - // Write the interpreter entry prologue + // Write the interpreter entry prologue. Might be NULL when out of memory. uint8_t *code_ptr = yjit_entry_prologue(cb, iseq); // Try to generate code for the entry block block_t *block = gen_block_version(blockid, &DEFAULT_CTX, ec); // If we couldn't generate any code - if (block->end_idx == insn_idx) - { + if (!block || block->end_idx == insn_idx) { return NULL; } diff --git a/yjit_iface.c b/yjit_iface.c index 917a32cbfa0830..8605a4162ebe3e 100644 --- a/yjit_iface.c +++ b/yjit_iface.c @@ -483,8 +483,7 @@ rb_yjit_compile_iseq(const rb_iseq_t *iseq, rb_execution_context_t *ec) // Compile a block version starting at the first instruction uint8_t *code_ptr = gen_entry_point(iseq, 0, ec); - if (code_ptr) - { + if (code_ptr) { iseq->body->jit_func = (yjit_func_t)code_ptr; } else {