Skip to content

Commit

Permalink
[Compressed Instructions] Support compressed instructions for RVC
Browse files Browse the repository at this point in the history
  • Loading branch information
zhengxiaolinX committed Jul 2, 2021
1 parent 0099657 commit 154b36e
Show file tree
Hide file tree
Showing 33 changed files with 1,832 additions and 474 deletions.
503 changes: 460 additions & 43 deletions src/hotspot/cpu/riscv64/assembler_riscv64.cpp

Large diffs are not rendered by default.

692 changes: 637 additions & 55 deletions src/hotspot/cpu/riscv64/assembler_riscv64.hpp

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions src/hotspot/cpu/riscv64/c1_CodeStubs_riscv64.cpp
Expand Up @@ -44,7 +44,7 @@ void C1SafepointPollStub::emit_code(LIR_Assembler* ce)
__ bind(_entry);
InternalAddress safepoint_pc(__ pc() - __ offset() + safepoint_offset());
__ code_section()->relocate(__ pc(), safepoint_pc.rspec());
__ la(t0, safepoint_pc.target());
__ la(t0, safepoint_pc.target(), false);
__ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset()));

assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
Expand Down Expand Up @@ -108,7 +108,7 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce)
}
int32_t off = 0;
__ la_patchable(lr, RuntimeAddress(Runtime1::entry_for(stub_id)), off);
__ jalr(lr, lr, off);
__ jalr_nc(lr, lr, off);
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
debug_only(__ should_not_reach_here());
Expand Down Expand Up @@ -257,7 +257,7 @@ void MonitorExitStub::emit_code(LIR_Assembler* ce)
__ far_jump(RuntimeAddress(Runtime1::entry_for(exit_id)));
}

int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size;
int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size();

void PatchingStub::align_patch_site(MacroAssembler* masm) {}

Expand Down
4 changes: 2 additions & 2 deletions src/hotspot/cpu/riscv64/c1_LIRAssembler_riscv64.cpp
Expand Up @@ -1393,7 +1393,7 @@ void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmit
InternalAddress pc_for_athrow(__ pc());
int32_t off = 0;
__ la_patchable(exceptionPC->as_register(), pc_for_athrow, off);
__ addi(exceptionPC->as_register(), exceptionPC->as_register(), off);
__ addi_nc(exceptionPC->as_register(), exceptionPC->as_register(), off);
add_call_info(pc_for_athrow_offset, info); // for exception handler

__ verify_not_null_oop(x10);
Expand Down Expand Up @@ -1806,7 +1806,7 @@ void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* arg
} else {
int32_t offset = 0;
__ la_patchable(t0, RuntimeAddress(dest), offset);
__ jalr(x1, t0, offset);
__ jalr_nc(x1, t0, offset);
}

if (info != NULL) {
Expand Down
8 changes: 4 additions & 4 deletions src/hotspot/cpu/riscv64/c1_LIRAssembler_riscv64.hpp
Expand Up @@ -72,16 +72,16 @@ friend class ArrayCopyStub;
{
// see emit_static_call_stub for detail:
// CompiledStaticCall::to_interp_stub_size() (14) + CompiledStaticCall::to_trampoline_stub_size() (1 + 3 + address)
_call_stub_size = 14 * NativeInstruction::instruction_size +
(NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size),
_call_stub_size = 14 * NativeInstruction::normal_instruction_size +
(NativeInstruction::normal_instruction_size + NativeCallTrampolineStub::instruction_size),
_call_aot_stub_size = 0,
// see emit_exception_handler for detail:
// verify_not_null_oop + far_call + should_not_reach_here + invalidate_registers(DEBUG_ONLY)
_exception_handler_size = DEBUG_ONLY(584) NOT_DEBUG(548), // or smaller
// see emit_deopt_handler for detail
// auipc (1) + far_jump (6 or 2)
_deopt_handler_size = 1 * NativeInstruction::instruction_size +
6 * NativeInstruction::instruction_size // or smaller
_deopt_handler_size = 1 * NativeInstruction::normal_instruction_size +
6 * NativeInstruction::normal_instruction_size // or smaller
};

void check_conflict(ciKlass* exact_klass, intptr_t current_klass, Register tmp,
Expand Down
10 changes: 5 additions & 5 deletions src/hotspot/cpu/riscv64/c1_MacroAssembler_riscv64.cpp
Expand Up @@ -348,7 +348,7 @@ void C1_MacroAssembler::verified_entry() {
// must ensure that this first instruction is a J, JAL or NOP.
// Make it a NOP.

nop();
nop_nc();
}

void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) {
Expand Down Expand Up @@ -391,9 +391,9 @@ void C1_MacroAssembler::invalidate_registers(bool inv_x10, bool inv_x9, bool inv
}
#endif // ifndef PRODUCT

typedef void (C1_MacroAssembler::*c1_cond_branch_insn)(Register op1, Register op2, Label& label, bool is_far);
typedef void (C1_MacroAssembler::*c1_cond_branch_insn)(Register op1, Register op2, Label& label, bool is_far, bool compressed);
typedef void (C1_MacroAssembler::*c1_float_cond_branch_insn)(FloatRegister op1, FloatRegister op2,
Label& label, bool is_far, bool is_unordered);
Label& label, bool is_far, bool is_unordered, bool compressed);

static c1_cond_branch_insn c1_cond_branch[] =
{
Expand Down Expand Up @@ -443,7 +443,7 @@ void C1_MacroAssembler::c1_cmp_branch(int cmpFlag, Register op1, Register op2, L
} else {
assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(c1_cond_branch) / sizeof(c1_cond_branch[0])),
"invalid c1 conditional branch index");
(this->*c1_cond_branch[cmpFlag])(op1, op2, label, is_far);
(this->*c1_cond_branch[cmpFlag])(op1, op2, label, is_far, true);
}
}

Expand All @@ -452,5 +452,5 @@ void C1_MacroAssembler::c1_float_cmp_branch(int cmpFlag, FloatRegister op1, Floa
assert(cmpFlag >= 0 &&
cmpFlag < (int)(sizeof(c1_float_cond_branch) / sizeof(c1_float_cond_branch[0])),
"invalid c1 float conditional branch index");
(this->*c1_float_cond_branch[cmpFlag])(op1, op2, label, is_far, is_unordered);
(this->*c1_float_cond_branch[cmpFlag])(op1, op2, label, is_far, is_unordered, true);
}
4 changes: 2 additions & 2 deletions src/hotspot/cpu/riscv64/c1_Runtime1_riscv64.cpp
Expand Up @@ -69,7 +69,7 @@ int StubAssembler::call_RT(Register oop_result, Register metadata_result, addres
// do the call
int32_t off = 0;
la_patchable(t0, RuntimeAddress(entry), off);
jalr(x1, t0, off);
jalr_nc(x1, t0, off);
bind(retaddr);
int call_offset = offset();
// verify callee-saved register
Expand Down Expand Up @@ -565,7 +565,7 @@ OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
// do the call
int32_t off = 0;
__ la_patchable(t0, RuntimeAddress(target), off);
__ jalr(x1, t0, off);
__ jalr_nc(x1, t0, off);
__ bind(retaddr);
OopMapSet* oop_maps = new OopMapSet();
assert_cond(oop_maps != NULL);
Expand Down
24 changes: 12 additions & 12 deletions src/hotspot/cpu/riscv64/c2_MacroAssembler_riscv64.cpp
Expand Up @@ -995,9 +995,9 @@ void C2_MacroAssembler::string_equals(Register a1, Register a2,
BLOCK_COMMENT("} string_equals");
}

typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far);
typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far, bool compressed);
typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label,
bool is_far, bool is_unordered);
bool is_far, bool is_unordered, bool compressed);

static conditional_branch_insn conditional_branches[] =
{
Expand Down Expand Up @@ -1045,44 +1045,44 @@ static float_conditional_branch_insn float_conditional_branches[] =
(float_conditional_branch_insn)&MacroAssembler::double_bge
};

void C2_MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) {
void C2_MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far, bool compressed) {
assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])),
"invalid conditional branch index");
(this->*conditional_branches[cmpFlag])(op1, op2, label, is_far);
(this->*conditional_branches[cmpFlag])(op1, op2, label, is_far, compressed);
}

// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use
// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode().
void C2_MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) {
void C2_MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far, bool compressed) {
assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])),
"invalid float conditional branch index");
int booltest_flag = cmpFlag & ~(C2_MacroAssembler::double_branch_mask);
(this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far,
(booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true);
(booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true, compressed);
}

void C2_MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
void C2_MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far, bool compressed) {
switch (cmpFlag) {
case BoolTest::eq:
case BoolTest::le:
beqz(op1, L, is_far);
beqz(op1, L, is_far, compressed);
break;
case BoolTest::ne:
case BoolTest::gt:
bnez(op1, L, is_far);
bnez(op1, L, is_far, compressed);
break;
default:
ShouldNotReachHere();
}
}

void C2_MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
void C2_MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far, bool compressed) {
switch (cmpFlag) {
case BoolTest::eq:
beqz(op1, L, is_far);
beqz(op1, L, is_far, compressed);
break;
case BoolTest::ne:
bnez(op1, L, is_far);
bnez(op1, L, is_far, compressed);
break;
default:
ShouldNotReachHere();
Expand Down
18 changes: 14 additions & 4 deletions src/hotspot/cpu/riscv64/c2_MacroAssembler_riscv64.hpp
Expand Up @@ -72,19 +72,29 @@
static const int double_branch_mask = 1 << bool_test_bits;

// cmp
// C-Ext: these cmp functions remain uncompressed in C2 MachNodes' emission -
// as the reason described in MachEpilogNode::emit() in PhaseOutput::scratch_emit_size()
// it simulates a node's size, but for MachBranchNodes it emits a fake Label just
// near the node itself - the offset is so small that in scratch emission phase it always
// get compressed in our implicit compression phase - but in real world the Label may be
// anywhere so it may not be compressed, so here is the mismatch: it runs shorten_branches();
// but with C-Ext we may need a further, say, shorten_compressed_branches() or something.
// After researching we find performance will not have much enhancement even if compressing
// them and the cost is a bit big to support MachBranchNodes' compression.
// So as a solution, we can simply disable the compression of MachBranchNodes.
void cmp_branch(int cmpFlag,
Register op1, Register op2,
Label& label, bool is_far = false);
Label& label, bool is_far = false, bool compressed = false);

void float_cmp_branch(int cmpFlag,
FloatRegister op1, FloatRegister op2,
Label& label, bool is_far = false);
Label& label, bool is_far = false, bool compressed = false);

void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op,
Label& L, bool is_far = false);
Label& L, bool is_far = false, bool compressed = false);

void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op,
Label& L, bool is_far = false);
Label& L, bool is_far = false, bool compressed = false);

void enc_cmove(int cmpFlag,
Register op1, Register op2,
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/cpu/riscv64/c2_globals_riscv64.hpp
Expand Up @@ -49,7 +49,7 @@ define_pd_global(intx, FLOATPRESSURE, 32);
define_pd_global(intx, FreqInlineSize, 325);
define_pd_global(intx, MinJumpTableSize, 10);
define_pd_global(intx, INTPRESSURE, 24);
define_pd_global(intx, InteriorEntryAlignment, 16);
define_pd_global(intx, InteriorEntryAlignment, 4);
define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
define_pd_global(intx, LoopUnrollLimit, 60);
define_pd_global(intx, LoopPercentProfileLimit, 10);
Expand Down
Expand Up @@ -41,7 +41,7 @@ void C2SafepointPollStubTable::emit_stub_impl(MacroAssembler& masm, C2SafepointP
__ bind(entry->_stub_label);
InternalAddress safepoint_pc(masm.pc() - masm.offset() + entry->_safepoint_offset);
masm.code_section()->relocate(masm.pc(), safepoint_pc.rspec());
__ la(t0, safepoint_pc.target());
__ la(t0, safepoint_pc.target(), false);
__ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset()));
__ far_jump(callback_addr);
}
Expand Down
10 changes: 7 additions & 3 deletions src/hotspot/cpu/riscv64/compiledIC_riscv64.cpp
Expand Up @@ -69,15 +69,19 @@ address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark)
#undef __

int CompiledStaticCall::to_interp_stub_size() {
// fence_i + fence* + (lui, addi, slli, addi, slli, addi) + (lui, addi, slli, addi, slli) + jalr
return NativeFenceI::instruction_size() + 12 * NativeInstruction::instruction_size;
// fence_i + fence* + (lui, addi, slli(C), addi, slli(C), addi) + (lui, addi, slli(C), addi, slli(C)) + jalr
return NativeFenceI::instruction_size() +
(!UseCExt ?
12 * NativeInstruction::normal_instruction_size :
8 * NativeInstruction::normal_instruction_size + 4 * NativeInstruction::compressed_instruction_size
);
}

int CompiledStaticCall::to_trampoline_stub_size() {
// Somewhat pessimistically, we count 4 instructions here (although
// there are only 3) because we sometimes emit an alignment nop.
// Trampoline stubs are always word aligned.
return NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size;
return NativeInstruction::normal_instruction_size + NativeCallTrampolineStub::instruction_size;
}

// Relocation entries for call stub, compiled java to interpreter.
Expand Down
15 changes: 11 additions & 4 deletions src/hotspot/cpu/riscv64/gc/shared/barrierSetAssembler_riscv64.cpp
Expand Up @@ -182,7 +182,7 @@ void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj,
// Get the current top of the heap
ExternalAddress address_top((address) Universe::heap()->top_addr());
__ la_patchable(t2, address_top, offset);
__ addi(t2, t2, offset);
__ addi_nc(t2, t2, offset);
__ lr_d(obj, t2, Assembler::aqrl);

// Adjust it my the size of our new object
Expand All @@ -200,7 +200,7 @@ void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj,
ExternalAddress address_end((address) Universe::heap()->end_addr());
offset = 0;
__ la_patchable(heap_end, address_end, offset);
__ ld(heap_end, Address(heap_end, offset));
__ ld_nc(heap_end, Address(heap_end, offset));

__ bgtu(end, heap_end, slow_case, is_far);

Expand All @@ -227,13 +227,18 @@ void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm,
__ sd(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset())));
}

extern int nmethod_barrier_guard_offset();

void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) {
BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();

if (bs_nm == NULL) {
return;
}

// RISCV's amoswap instructions need an alignment for the memory address it swaps
while ((__ offset() + nmethod_barrier_guard_offset()) % 4 != 0) { __ nop(); }

Label skip, guard;
Address thread_disarmed_addr(xthread, in_bytes(bs_nm->thread_disarmed_offset()));

Expand All @@ -246,12 +251,14 @@ void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) {
__ beq(t0, t1, skip);

int32_t offset = 0;
__ movptr_with_offset(t0, StubRoutines::riscv64::method_entry_barrier(), offset);
__ jalr(lr, t0, offset);
__ movptr_with_offset(t0, StubRoutines::riscv64::method_entry_barrier(), offset, false);
__ jalr_nc(lr, t0, offset);
__ j(skip);

__ bind(guard);

assert(__ offset() % 4 == 0, "RISCV CAS needs an alignment for memory");

__ emit_int32(0); // nmethod guard value. Skipped over in common case.

__ bind(skip);
Expand Down

0 comments on commit 154b36e

Please sign in to comment.