Skip to content

8332265: RISC-V: Materialize pointers faster by using a temp register #19246

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 12 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ static jlong as_long(LIR_Opr data) {
Address LIR_Assembler::as_Address(LIR_Address* addr, Register tmp) {
if (addr->base()->is_illegal()) {
assert(addr->index()->is_illegal(), "must be illegal too");
__ movptr(tmp, addr->disp());
__ movptr(tmp, (address)addr->disp());
return Address(tmp, 0);
}

Expand Down
6 changes: 4 additions & 2 deletions src/hotspot/cpu/riscv/jvmciCodeInstaller_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,10 @@ jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, JVMC
return pc_offset + NativeCall::instruction_size;
} else if (inst->is_jump()) {
return pc_offset + NativeJump::instruction_size;
} else if (inst->is_movptr()) {
return pc_offset + NativeMovConstReg::movptr_instruction_size;
} else if (inst->is_movptr1()) {
return pc_offset + NativeMovConstReg::movptr1_instruction_size;
} else if (inst->is_movptr2()) {
return pc_offset + NativeMovConstReg::movptr2_instruction_size;
} else {
JVMCI_ERROR_0("unsupported type of instruction for call site");
}
Expand Down
140 changes: 105 additions & 35 deletions src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -640,7 +640,7 @@ void MacroAssembler::emit_static_call_stub() {

// Jump to the entry point of the c2i stub.
int32_t offset = 0;
movptr(t0, 0, offset);
movptr(t0, 0, offset, t1); // lui + lui + slli + add
jr(t0, offset);
}

Expand Down Expand Up @@ -1425,15 +1425,34 @@ static int patch_offset_in_pc_relative(address branch, int64_t offset) {
return PC_RELATIVE_INSTRUCTION_NUM * NativeInstruction::instruction_size;
}

static int patch_addr_in_movptr(address branch, address target) {
const int MOVPTR_INSTRUCTIONS_NUM = 6; // lui + addi + slli + addi + slli + addi/jalr/load
static int patch_addr_in_movptr1(address branch, address target) {
int32_t lower = ((intptr_t)target << 35) >> 35;
int64_t upper = ((intptr_t)target - lower) >> 29;
Assembler::patch(branch + 0, 31, 12, upper & 0xfffff); // Lui. target[48:29] + target[28] ==> branch[31:12]
Assembler::patch(branch + 4, 31, 20, (lower >> 17) & 0xfff); // Addi. target[28:17] ==> branch[31:20]
Assembler::patch(branch + 12, 31, 20, (lower >> 6) & 0x7ff); // Addi. target[16: 6] ==> branch[31:20]
Assembler::patch(branch + 20, 31, 20, lower & 0x3f); // Addi/Jalr/Load. target[ 5: 0] ==> branch[31:20]
return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
return NativeMovConstReg::movptr1_instruction_size;
}

static int patch_addr_in_movptr2(address instruction_address, address target) {
uintptr_t addr = (uintptr_t)target;

assert(addr < (1ull << 48), "48-bit overflow in address constant");
unsigned int upper18 = (addr >> 30ull);
int lower30 = (addr & 0x3fffffffu);
int low12 = (lower30 << 20) >> 20;
int mid18 = ((lower30 - low12) >> 12);

Assembler::patch(instruction_address + (NativeInstruction::instruction_size * 0), 31, 12, (upper18 & 0xfffff)); // Lui
Assembler::patch(instruction_address + (NativeInstruction::instruction_size * 1), 31, 12, (mid18 & 0xfffff)); // Lui
// Slli
// Add
Assembler::patch(instruction_address + (NativeInstruction::instruction_size * 4), 31, 20, low12 & 0xfff); // Addi/Jalr/Load

assert(MacroAssembler::target_addr_for_insn(instruction_address) == target, "Must be");

return NativeMovConstReg::movptr2_instruction_size;
}

static int patch_imm_in_li64(address branch, address target) {
Expand Down Expand Up @@ -1507,7 +1526,7 @@ static long get_offset_of_pc_relative(address insn_addr) {
return offset;
}

static address get_target_of_movptr(address insn_addr) {
static address get_target_of_movptr1(address insn_addr) {
assert_cond(insn_addr != nullptr);
intptr_t target_address = (((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12)) & 0xfffff) << 29; // Lui.
target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)) << 17; // Addi.
Expand All @@ -1516,6 +1535,17 @@ static address get_target_of_movptr(address insn_addr) {
return (address) target_address;
}

static address get_target_of_movptr2(address insn_addr) {
assert_cond(insn_addr != nullptr);
int32_t upper18 = ((Assembler::sextract(Assembler::ld_instr(insn_addr + NativeInstruction::instruction_size * 0), 31, 12)) & 0xfffff); // Lui
int32_t mid18 = ((Assembler::sextract(Assembler::ld_instr(insn_addr + NativeInstruction::instruction_size * 1), 31, 12)) & 0xfffff); // Lui
// 2 // Slli
// 3 // Add
int32_t low12 = ((Assembler::sextract(Assembler::ld_instr(insn_addr + NativeInstruction::instruction_size * 4), 31, 20))); // Addi/Jalr/Load.
address ret = (address)(((intptr_t)upper18<<30ll) + ((intptr_t)mid18<<12ll) + low12);
return ret;
}

static address get_target_of_li64(address insn_addr) {
assert_cond(insn_addr != nullptr);
intptr_t target_address = (((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12)) & 0xfffff) << 44; // Lui.
Expand All @@ -1535,30 +1565,32 @@ address MacroAssembler::get_target_of_li32(address insn_addr) {

// Patch any kind of instruction; there may be several instructions.
// Return the total length (in bytes) of the instructions.
int MacroAssembler::pd_patch_instruction_size(address branch, address target) {
assert_cond(branch != nullptr);
int64_t offset = target - branch;
if (NativeInstruction::is_jal_at(branch)) { // jal
return patch_offset_in_jal(branch, offset);
} else if (NativeInstruction::is_branch_at(branch)) { // beq/bge/bgeu/blt/bltu/bne
return patch_offset_in_conditional_branch(branch, offset);
} else if (NativeInstruction::is_pc_relative_at(branch)) { // auipc, addi/jalr/load
return patch_offset_in_pc_relative(branch, offset);
} else if (NativeInstruction::is_movptr_at(branch)) { // movptr
return patch_addr_in_movptr(branch, target);
} else if (NativeInstruction::is_li64_at(branch)) { // li64
return patch_imm_in_li64(branch, target);
} else if (NativeInstruction::is_li32_at(branch)) { // li32
int MacroAssembler::pd_patch_instruction_size(address instruction_address, address target) {
assert_cond(instruction_address != nullptr);
int64_t offset = target - instruction_address;
if (NativeInstruction::is_jal_at(instruction_address)) { // jal
return patch_offset_in_jal(instruction_address, offset);
} else if (NativeInstruction::is_branch_at(instruction_address)) { // beq/bge/bgeu/blt/bltu/bne
return patch_offset_in_conditional_branch(instruction_address, offset);
} else if (NativeInstruction::is_pc_relative_at(instruction_address)) { // auipc, addi/jalr/load
return patch_offset_in_pc_relative(instruction_address, offset);
} else if (NativeInstruction::is_movptr1_at(instruction_address)) { // movptr1
return patch_addr_in_movptr1(instruction_address, target);
} else if (NativeInstruction::is_movptr2_at(instruction_address)) { // movptr2
return patch_addr_in_movptr2(instruction_address, target);
} else if (NativeInstruction::is_li64_at(instruction_address)) { // li64
return patch_imm_in_li64(instruction_address, target);
} else if (NativeInstruction::is_li32_at(instruction_address)) { // li32
int64_t imm = (intptr_t)target;
return patch_imm_in_li32(branch, (int32_t)imm);
} else if (NativeInstruction::is_li16u_at(branch)) {
return patch_imm_in_li32(instruction_address, (int32_t)imm);
} else if (NativeInstruction::is_li16u_at(instruction_address)) {
int64_t imm = (intptr_t)target;
return patch_imm_in_li16u(branch, (uint16_t)imm);
return patch_imm_in_li16u(instruction_address, (uint16_t)imm);
} else {
#ifdef ASSERT
tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n",
Assembler::ld_instr(branch), p2i(branch));
Disassembler::decode(branch - 16, branch + 16);
Assembler::ld_instr(instruction_address), p2i(instruction_address));
Disassembler::decode(instruction_address - 16, instruction_address + 16);
#endif
ShouldNotReachHere();
return -1;
Expand All @@ -1574,8 +1606,10 @@ address MacroAssembler::target_addr_for_insn(address insn_addr) {
offset = get_offset_of_conditional_branch(insn_addr);
} else if (NativeInstruction::is_pc_relative_at(insn_addr)) { // auipc, addi/jalr/load
offset = get_offset_of_pc_relative(insn_addr);
} else if (NativeInstruction::is_movptr_at(insn_addr)) { // movptr
return get_target_of_movptr(insn_addr);
} else if (NativeInstruction::is_movptr1_at(insn_addr)) { // movptr1
return get_target_of_movptr1(insn_addr);
} else if (NativeInstruction::is_movptr2_at(insn_addr)) { // movptr2
return get_target_of_movptr2(insn_addr);
} else if (NativeInstruction::is_li64_at(insn_addr)) { // li64
return get_target_of_li64(insn_addr);
} else if (NativeInstruction::is_li32_at(insn_addr)) { // li32
Expand All @@ -1594,9 +1628,12 @@ int MacroAssembler::patch_oop(address insn_addr, address o) {
// Move narrow OOP
uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o));
return patch_imm_in_li32(insn_addr, (int32_t)n);
} else if (NativeInstruction::is_movptr_at(insn_addr)) {
} else if (NativeInstruction::is_movptr1_at(insn_addr)) {
// Move wide OOP
return patch_addr_in_movptr1(insn_addr, o);
} else if (NativeInstruction::is_movptr2_at(insn_addr)) {
// Move wide OOP
return patch_addr_in_movptr(insn_addr, o);
return patch_addr_in_movptr2(insn_addr, o);
}
ShouldNotReachHere();
return -1;
Expand All @@ -1617,16 +1654,31 @@ void MacroAssembler::reinit_heapbase() {
}
}

void MacroAssembler::movptr(Register Rd, address addr, int32_t &offset) {
int64_t imm64 = (int64_t)addr;
void MacroAssembler::movptr(Register Rd, address addr, Register temp) {
int offset = 0;
movptr(Rd, addr, offset, temp);
addi(Rd, Rd, offset);
}

void MacroAssembler::movptr(Register Rd, address addr, int32_t &offset, Register temp) {
uint64_t uimm64 = (uint64_t)addr;
#ifndef PRODUCT
{
char buffer[64];
snprintf(buffer, sizeof(buffer), "0x%" PRIx64, imm64);
snprintf(buffer, sizeof(buffer), "0x%" PRIx64, uimm64);
block_comment(buffer);
}
#endif
assert((uintptr_t)imm64 < (1ull << 48), "48-bit overflow in address constant");
assert(uimm64 < (1ull << 48), "48-bit overflow in address constant");

if (temp == noreg) {
movptr1(Rd, uimm64, offset);
} else {
movptr2(Rd, uimm64, offset, temp);
}
}

void MacroAssembler::movptr1(Register Rd, uint64_t imm64, int32_t &offset) {
// Load upper 31 bits
int64_t imm = imm64 >> 17;
int64_t upper = imm, lower = imm;
Expand All @@ -1645,6 +1697,23 @@ void MacroAssembler::movptr(Register Rd, address addr, int32_t &offset) {
offset = imm64 & 0x3f;
}

void MacroAssembler::movptr2(Register Rd, uint64_t addr, int32_t &offset, Register tmp) {
assert_different_registers(Rd, tmp, noreg);

uint32_t upper18 = (addr >> 30ull);
int32_t lower30 = (addr & 0x3fffffffu);
int32_t low12 = (lower30 << 20) >> 20;
int32_t mid18 = ((lower30 - low12) >> 12);

lui(tmp, upper18 << 12);
lui(Rd, mid18 << 12);

slli(tmp, tmp, 18);
add(Rd, Rd, tmp);

offset = low12;
}

void MacroAssembler::add(Register Rd, Register Rn, int64_t increment, Register temp) {
if (is_simm12(increment)) {
addi(Rd, Rn, increment);
Expand Down Expand Up @@ -2120,6 +2189,7 @@ void MacroAssembler::movoop(Register dst, jobject obj) {

// Move a metadata address into a register.
void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
assert((uintptr_t)obj < (1ull << 48), "48-bit overflow in metadata");
int oop_index;
if (obj == nullptr) {
oop_index = oop_recorder()->allocate_metadata_index(obj);
Expand Down Expand Up @@ -3554,7 +3624,7 @@ address MacroAssembler::trampoline_call(Address entry) {
address MacroAssembler::ic_call(address entry, jint method_index) {
RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
IncompressibleRegion ir(this); // relocations
movptr(t1, (address)Universe::non_oop_word());
movptr(t1, (address)Universe::non_oop_word(), t0);
assert_cond(entry != nullptr);
return trampoline_call(Address(entry, rh));
}
Expand Down Expand Up @@ -3661,8 +3731,8 @@ int MacroAssembler::max_trampoline_stub_size() {
}

int MacroAssembler::static_call_stub_size() {
// (lui, addi, slli, addi, slli, addi) + (lui, addi, slli, addi, slli) + jalr
return 12 * NativeInstruction::instruction_size;
// (lui, addi, slli, addi, slli, addi) + (lui + lui + slli + add) + jalr
return 11 * NativeInstruction::instruction_size;
}

Address MacroAssembler::add_memory_helper(const Address dst, Register tmp) {
Expand Down
21 changes: 10 additions & 11 deletions src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -804,17 +804,16 @@ class MacroAssembler: public Assembler {
}
}

void movptr(Register Rd, address addr, int32_t &offset);

void movptr(Register Rd, address addr) {
int offset = 0;
movptr(Rd, addr, offset);
addi(Rd, Rd, offset);
}

inline void movptr(Register Rd, uintptr_t imm64) {
movptr(Rd, (address)imm64);
}
// Generates a load of a 48-bit constant which can be
// patched to any 48-bit constant, i.e. address.
// If common case supply additional temp register
// to shorten the instruction sequence.
void movptr(Register Rd, address addr, Register tmp = noreg);
void movptr(Register Rd, address addr, int32_t &offset, Register tmp = noreg);
private:
void movptr1(Register Rd, uintptr_t addr, int32_t &offset);
void movptr2(Register Rd, uintptr_t addr, int32_t &offset, Register tmp);
public:

// arith
void add (Register Rd, Register Rn, int64_t increment, Register temp = t0);
Expand Down
30 changes: 23 additions & 7 deletions src/hotspot/cpu/riscv/nativeInst_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ bool NativeInstruction::is_load_pc_relative_at(address instr) {
check_load_pc_relative_data_dependency(instr);
}

bool NativeInstruction::is_movptr_at(address instr) {
bool NativeInstruction::is_movptr1_at(address instr) {
return is_lui_at(instr) && // Lui
is_addi_at(instr + instruction_size) && // Addi
is_slli_shift_at(instr + instruction_size * 2, 11) && // Slli Rd, Rs, 11
Expand All @@ -93,7 +93,18 @@ bool NativeInstruction::is_movptr_at(address instr) {
(is_addi_at(instr + instruction_size * 5) ||
is_jalr_at(instr + instruction_size * 5) ||
is_load_at(instr + instruction_size * 5)) && // Addi/Jalr/Load
check_movptr_data_dependency(instr);
check_movptr1_data_dependency(instr);
}

bool NativeInstruction::is_movptr2_at(address instr) {
return is_lui_at(instr) && // lui
is_lui_at(instr + instruction_size) && // lui
is_slli_shift_at(instr + instruction_size * 2, 18) && // slli Rd, Rs, 18
is_add_at(instr + instruction_size * 3) &&
(is_addi_at(instr + instruction_size * 4) ||
is_jalr_at(instr + instruction_size * 4) ||
is_load_at(instr + instruction_size * 4)) && // Addi/Jalr/Load
check_movptr2_data_dependency(instr);
}

bool NativeInstruction::is_li16u_at(address instr) {
Expand Down Expand Up @@ -201,10 +212,11 @@ void NativeCall::insert(address code_pos, address entry) { Unimplemented(); }
//-------------------------------------------------------------------

void NativeMovConstReg::verify() {
if (!(nativeInstruction_at(instruction_address())->is_movptr() ||
is_auipc_at(instruction_address()))) {
fatal("should be MOVPTR or AUIPC");
NativeInstruction* ni = nativeInstruction_at(instruction_address());
if (ni->is_movptr() || ni->is_auipc()) {
return;
}
fatal("should be MOVPTR or AUIPC");
}

intptr_t NativeMovConstReg::data() const {
Expand All @@ -223,7 +235,7 @@ void NativeMovConstReg::set_data(intptr_t x) {
} else {
// Store x into the instruction stream.
MacroAssembler::pd_patch_instruction_size(instruction_address(), (address)x);
ICache::invalidate_range(instruction_address(), movptr_instruction_size);
ICache::invalidate_range(instruction_address(), movptr1_instruction_size /* > movptr2_instruction_size */ );
}

// Find and replace the oop/metadata corresponding to this
Expand Down Expand Up @@ -393,13 +405,15 @@ void NativeJump::patch_verified_entry(address entry, address verified_entry, add
ICache::invalidate_range(verified_entry, instruction_size);
}

//-------------------------------------------------------------------

void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
CodeBuffer cb(code_pos, instruction_size);
MacroAssembler a(&cb);
Assembler::IncompressibleRegion ir(&a); // Fixed length: see NativeGeneralJump::get_instruction_size()

int32_t offset = 0;
a.movptr(t0, entry, offset); // lui, addi, slli, addi, slli
a.movptr(t0, entry, offset, t1); // lui, lui, slli, add
a.jr(t0, offset); // jalr

ICache::invalidate_range(code_pos, instruction_size);
Expand All @@ -410,6 +424,8 @@ void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer)
ShouldNotCallThis();
}

//-------------------------------------------------------------------

address NativeCallTrampolineStub::destination(nmethod *nm) const {
return ptr_at(data_offset);
}
Expand Down
Loading