Skip to content
Permalink
Browse files
8275647: Enable multi-register return values for optimized upcall stubs
Reviewed-by: mcimadamore, ngasson
  • Loading branch information
JornVernee committed Nov 18, 2021
1 parent 4029dd5 commit fb07746ab9eb121e9b5e9e0d07cf9ad6f5eabc9a
Show file tree
Hide file tree
Showing 32 changed files with 359 additions and 899 deletions.
@@ -72,50 +72,6 @@ const ABIDescriptor ForeignGlobals::parse_abi_descriptor_impl(jobject jabi) cons
return abi;
}

const BufferLayout ForeignGlobals::parse_buffer_layout_impl(jobject jlayout) const {
oop layout_oop = JNIHandles::resolve_non_null(jlayout);
BufferLayout layout;

layout.stack_args_bytes = layout_oop->long_field(BL.stack_args_bytes_offset);
layout.stack_args = layout_oop->long_field(BL.stack_args_offset);
layout.arguments_next_pc = layout_oop->long_field(BL.arguments_next_pc_offset);

typeArrayOop input_offsets = oop_cast<typeArrayOop>(layout_oop->obj_field(BL.input_type_offsets_offset));
layout.arguments_integer = (size_t) input_offsets->long_at(INTEGER_TYPE);
layout.arguments_vector = (size_t) input_offsets->long_at(VECTOR_TYPE);

typeArrayOop output_offsets = oop_cast<typeArrayOop>(layout_oop->obj_field(BL.output_type_offsets_offset));
layout.returns_integer = (size_t) output_offsets->long_at(INTEGER_TYPE);
layout.returns_vector = (size_t) output_offsets->long_at(VECTOR_TYPE);

layout.buffer_size = layout_oop->long_field(BL.size_offset);

return layout;
}

const CallRegs ForeignGlobals::parse_call_regs_impl(jobject jconv) const {
oop conv_oop = JNIHandles::resolve_non_null(jconv);
objArrayOop arg_regs_oop = oop_cast<objArrayOop>(conv_oop->obj_field(CallConvOffsets.arg_regs_offset));
objArrayOop ret_regs_oop = oop_cast<objArrayOop>(conv_oop->obj_field(CallConvOffsets.ret_regs_offset));

CallRegs result;
result._args_length = arg_regs_oop->length();
result._arg_regs = NEW_RESOURCE_ARRAY(VMReg, result._args_length);

result._rets_length = ret_regs_oop->length();
result._ret_regs = NEW_RESOURCE_ARRAY(VMReg, result._rets_length);

for (int i = 0; i < result._args_length; i++) {
result._arg_regs[i] = parse_vmstorage(arg_regs_oop->obj_at(i));
}

for (int i = 0; i < result._rets_length; i++) {
result._ret_regs[i] = parse_vmstorage(ret_regs_oop->obj_at(i));
}

return result;
}

enum class RegType {
INTEGER = 0,
VECTOR = 1,
@@ -49,15 +49,4 @@ struct ABIDescriptor {
bool is_volatile_reg(FloatRegister reg) const;
};

struct BufferLayout {
size_t stack_args_bytes;
size_t stack_args;
size_t arguments_vector;
size_t arguments_integer;
size_t arguments_next_pc;
size_t returns_vector;
size_t returns_integer;
size_t buffer_size;
};

#endif // CPU_AARCH64_VM_FOREIGN_GLOBALS_AARCH64_HPP
@@ -37,77 +37,6 @@

#define __ _masm->

// 1. Create buffer according to layout
// 2. Load registers & stack args into buffer
// 3. Call upcall helper with upcall handler instance & buffer pointer (C++ ABI)
// 4. Load return value from buffer into foreign ABI registers
// 5. Return
address ProgrammableUpcallHandler::generate_upcall_stub(jobject rec, jobject jabi, jobject jlayout) {
ResourceMark rm;
const ABIDescriptor abi = ForeignGlobals::parse_abi_descriptor(jabi);
const BufferLayout layout = ForeignGlobals::parse_buffer_layout(jlayout);

CodeBuffer buffer("upcall_stub", 1024, upcall_stub_size);

MacroAssembler* _masm = new MacroAssembler(&buffer);

// stub code
__ enter();

// save pointer to JNI receiver handle into constant segment
Address rec_adr = InternalAddress(__ address_constant((address)rec));

assert(abi._stack_alignment_bytes % 16 == 0, "stack must be 16 byte aligned");

__ sub(sp, sp, (int) align_up(layout.buffer_size, abi._stack_alignment_bytes));

// TODO: This stub only uses registers which are caller-save in the
// standard C ABI. If this is called from a different ABI then
// we need to save registers here according to abi.is_volatile_reg.

for (int i = 0; i < abi._integer_argument_registers.length(); i++) {
Register reg = abi._integer_argument_registers.at(i);
ssize_t offset = layout.arguments_integer + i * sizeof(uintptr_t);
__ str(reg, Address(sp, offset));
}

for (int i = 0; i < abi._vector_argument_registers.length(); i++) {
FloatRegister reg = abi._vector_argument_registers.at(i);
ssize_t offset = layout.arguments_vector + i * float_reg_size;
__ strq(reg, Address(sp, offset));
}

// Capture prev stack pointer (stack arguments base)
__ add(rscratch1, rfp, 16); // Skip saved FP and LR
__ str(rscratch1, Address(sp, layout.stack_args));

// Call upcall helper
__ ldr(c_rarg0, rec_adr);
__ mov(c_rarg1, sp);
__ movptr(rscratch1, CAST_FROM_FN_PTR(uint64_t, ProgrammableUpcallHandler::attach_thread_and_do_upcall));
__ blr(rscratch1);

for (int i = 0; i < abi._integer_return_registers.length(); i++) {
ssize_t offs = layout.returns_integer + i * sizeof(uintptr_t);
__ ldr(abi._integer_return_registers.at(i), Address(sp, offs));
}

for (int i = 0; i < abi._vector_return_registers.length(); i++) {
FloatRegister reg = abi._vector_return_registers.at(i);
ssize_t offs = layout.returns_vector + i * float_reg_size;
__ ldrq(reg, Address(sp, offs));
}

__ leave();
__ ret(lr);

__ flush();

BufferBlob* blob = BufferBlob::create("upcall_stub", &buffer);

return blob->code_begin();
}

// for callee saved regs, according to the caller's ABI
static int compute_reg_save_area_size(const ABIDescriptor& abi) {
int size = 0;
@@ -186,35 +115,17 @@ static void restore_callee_saved_registers(MacroAssembler* _masm, const ABIDescr
__ block_comment("} restore_callee_saved_regs ");
}

address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject receiver, Method* entry, jobject jabi, jobject jconv) {
address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject receiver, Method* entry,
BasicType* in_sig_bt, int total_in_args,
BasicType* out_sig_bt, int total_out_args,
BasicType ret_type,
jobject jabi, jobject jconv,
bool needs_return_buffer, int ret_buf_size) {
ResourceMark rm;
const ABIDescriptor abi = ForeignGlobals::parse_abi_descriptor(jabi);
const CallRegs call_regs = ForeignGlobals::parse_call_regs(jconv);
assert(call_regs._rets_length <= 1, "no multi reg returns");
CodeBuffer buffer("upcall_stub_linkToNative", /* code_size = */ 2048, /* locs_size = */ 1024);

assert(entry->is_static(), "static only");
// Fill in the signature array, for the calling-convention call.
const int total_out_args = entry->size_of_parameters();
assert(total_out_args > 0, "receiver arg");

BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_out_args);
BasicType ret_type;
{
int i = 0;
SignatureStream ss(entry->signature());
for (; !ss.at_return_type(); ss.next()) {
out_sig_bt[i++] = ss.type(); // Collect remaining bits of signature
if (ss.type() == T_LONG || ss.type() == T_DOUBLE)
out_sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots
}
assert(i == total_out_args, "");
ret_type = ss.type();
}
// skip receiver
BasicType* in_sig_bt = out_sig_bt + 1;
int total_in_args = total_out_args - 1;

Register shuffle_reg = r19;
JavaCallConv out_conv;
NativeCallConv in_conv(call_regs._arg_regs, call_regs._args_length);
@@ -251,6 +162,12 @@ address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject receiv
int frame_data_offset = reg_save_area_offset + reg_save_area_size;
int frame_bottom_offset = frame_data_offset + sizeof(OptimizedEntryBlob::FrameData);

int ret_buf_offset = -1;
if (needs_return_buffer) {
ret_buf_offset = frame_bottom_offset;
frame_bottom_offset += ret_buf_size;
}

int frame_size = frame_bottom_offset;
frame_size = align_up(frame_size, StackAlignmentInBytes);

@@ -259,6 +176,9 @@ address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject receiv
//
// FP-> | |
// |---------------------| = frame_bottom_offset = frame_size
// | (optional) |
// | ret_buf |
// |---------------------| = ret_buf_offset
// | |
// | FrameData |
// |---------------------| = frame_data_offset
@@ -303,6 +223,10 @@ address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject receiv

__ block_comment("{ argument shuffle");
arg_spilller.generate_fill(_masm, arg_save_area_offset);
if (needs_return_buffer) {
assert(ret_buf_offset != -1, "no return buffer allocated");
__ lea(abi._ret_buf_addr_reg, Address(sp, ret_buf_offset));
}
arg_shuffle.generate(_masm, shuffle_reg->as_VMReg(), abi._shadow_space_bytes, 0);
__ block_comment("} argument shuffle");

@@ -318,6 +242,51 @@ address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject receiv
__ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset()));
__ blr(rscratch1);

// return value shuffle
if (!needs_return_buffer) {
#ifdef ASSERT
if (call_regs._rets_length == 1) { // 0 or 1
VMReg j_expected_result_reg;
switch (ret_type) {
case T_BOOLEAN:
case T_BYTE:
case T_SHORT:
case T_CHAR:
case T_INT:
case T_LONG:
j_expected_result_reg = r0->as_VMReg();
break;
case T_FLOAT:
case T_DOUBLE:
j_expected_result_reg = v0->as_VMReg();
break;
default:
fatal("unexpected return type: %s", type2name(ret_type));
}
// No need to move for now, since CallArranger can pick a return type
// that goes in the same reg for both CCs. But, at least assert they are the same
assert(call_regs._ret_regs[0] == j_expected_result_reg,
"unexpected result register: %s != %s", call_regs._ret_regs[0]->name(), j_expected_result_reg->name());
}
#endif
} else {
assert(ret_buf_offset != -1, "no return buffer allocated");
__ lea(rscratch1, Address(sp, ret_buf_offset));
int offset = 0;
for (int i = 0; i < call_regs._rets_length; i++) {
VMReg reg = call_regs._ret_regs[i];
if (reg->is_Register()) {
__ ldr(reg->as_Register(), Address(rscratch1, offset));
offset += 8;
} else if (reg->is_FloatRegister()) {
__ ldrd(reg->as_FloatRegister(), Address(rscratch1, offset));
offset += 16; // needs to match VECTOR_REG_SIZE in AArch64Architecture (Java)
} else {
ShouldNotReachHere();
}
}
}

result_spiller.generate_spill(_masm, res_save_area_offset);

__ block_comment("{ on_exit");
@@ -331,33 +300,6 @@ address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject receiv

result_spiller.generate_fill(_masm, res_save_area_offset);

// return value shuffle
#ifdef ASSERT
if (call_regs._rets_length == 1) { // 0 or 1
VMReg j_expected_result_reg;
switch (ret_type) {
case T_BOOLEAN:
case T_BYTE:
case T_SHORT:
case T_CHAR:
case T_INT:
case T_LONG:
j_expected_result_reg = r0->as_VMReg();
break;
case T_FLOAT:
case T_DOUBLE:
j_expected_result_reg = v0->as_VMReg();
break;
default:
fatal("unexpected return type: %s", type2name(ret_type));
}
// No need to move for now, since CallArranger can pick a return type
// that goes in the same reg for both CCs. But, at least assert they are the same
assert(call_regs._ret_regs[0] == j_expected_result_reg,
"unexpected result register: %s != %s", call_regs._ret_regs[0]->name(), j_expected_result_reg->name());
}
#endif

__ leave();
__ ret(lr);

@@ -386,15 +328,16 @@ address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject receiv
const char* name = "optimized_upcall_stub";
#endif // PRODUCT

OptimizedEntryBlob* blob = OptimizedEntryBlob::create(name, &buffer, exception_handler_offset, receiver, in_ByteSize(frame_data_offset));
OptimizedEntryBlob* blob
= OptimizedEntryBlob::create(name,
&buffer,
exception_handler_offset,
receiver,
in_ByteSize(frame_data_offset));

if (TraceOptimizedUpcallStubs) {
blob->print_on(tty);
}

return blob->code_begin();
}

bool ProgrammableUpcallHandler::supports_optimized_upcalls() {
return true;
}
@@ -30,11 +30,6 @@ const ABIDescriptor ForeignGlobals::parse_abi_descriptor_impl(jobject jabi) cons
return {};
}

const BufferLayout ForeignGlobals::parse_buffer_layout_impl(jobject jlayout) const {
Unimplemented();
return {};
}

const CallRegs ForeignGlobals::parse_call_regs_impl(jobject jconv) const {
Unimplemented();
return {};
@@ -24,7 +24,6 @@
#ifndef CPU_ARM_VM_FOREIGN_GLOBALS_ARM_HPP
#define CPU_ARM_VM_FOREIGN_GLOBALS_ARM_HPP

class BufferLayout {};
class ABIDescriptor {};

#endif // CPU_ARM_VM_FOREIGN_GLOBALS_ARM_HPP
@@ -25,16 +25,12 @@
#include "prims/universalUpcallHandler.hpp"
#include "utilities/debug.hpp"

address ProgrammableUpcallHandler::generate_upcall_stub(jobject rec, jobject jabi, jobject jlayout) {
Unimplemented();
return nullptr;
}

address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject mh, Method* entry, jobject jabi, jobject jconv) {
address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject receiver, Method* entry,
BasicType* in_sig_bt, int total_in_args,
BasicType* out_sig_bt, int total_out_args,
BasicType ret_type,
jobject jabi, jobject jconv,
bool needs_return_buffer, int ret_buf_size) {
ShouldNotCallThis();
return nullptr;
}

bool ProgrammableUpcallHandler::supports_optimized_upcalls() {
return false;
}
@@ -32,11 +32,6 @@ const ABIDescriptor ForeignGlobals::parse_abi_descriptor_impl(jobject jabi) cons
return {};
}

const BufferLayout ForeignGlobals::parse_buffer_layout_impl(jobject jlayout) const {
Unimplemented();
return {};
}

const CallRegs ForeignGlobals::parse_call_regs_impl(jobject jconv) const {
Unimplemented();
return {};
@@ -25,7 +25,6 @@
#ifndef CPU_PPC_VM_FOREIGN_GLOBALS_PPC_HPP
#define CPU_PPC_VM_FOREIGN_GLOBALS_PPC_HPP

class BufferLayout {};
class ABIDescriptor {};

#endif // CPU_PPC_VM_FOREIGN_GLOBALS_PPC_HPP

0 comments on commit fb07746

Please sign in to comment.