Skip to content
Permalink
Browse files
8277440: riscv: Move UseVExt from product to experimental
Reviewed-by: yadongwang, fyang
  • Loading branch information
Yanhong Zhu authored and Fei Yang committed Nov 19, 2021
1 parent 398cec3 commit dfde4e4ebfb1c15f1fb225f7400aced51cc469dd
@@ -134,10 +134,9 @@ void AbstractInterpreter::layout_activation(Method* method,
#endif

interpreter_frame->interpreter_frame_set_method(method);
// NOTE the difference in using sender_sp and
// interpreter_frame_sender_sp interpreter_frame_sender_sp is
// the original sp of the caller (the unextended_sp) and
// sender_sp is fp+8/16 (32bit/64bit)
// NOTE the difference in using sender_sp and interpreter_frame_sender_sp
// interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp)
// and sender_sp is fp
intptr_t* locals = NULL;
if (caller->is_interpreted_frame()) {
locals = caller->interpreter_frame_last_sp() + caller_actual_parameters - 1;

Large diffs are not rendered by default.

@@ -42,7 +42,7 @@ using MacroAssembler::null_check;
void try_allocate(
Register obj, // result: pointer to object after successful allocation
Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
int con_size_in_bytes, // object size in bytes if known at compile time
int con_size_in_bytes, // object size in bytes if known at compile time
Register tmp1, // temp register
Register tmp2, // temp register
Label& slow_case // continuation point if fast allocation fails
@@ -241,7 +241,7 @@ enum reg_save_layout {
};

// Save off registers which might be killed by calls into the runtime.
// Tries to smart of about FP registers. In particular we separate
// Tries to smart of about FPU registers. In particular we separate
// saving and describing the FPU registers for deoptimization since we
// have to save the FPU registers twice if we describe them. The
// deopt blob is the only thing which needs to describe FPU registers.
@@ -259,7 +259,7 @@ static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) {
assert_cond(oop_map != NULL);

// cpu_regs, caller save registers only, see FrameMap::initialize
// in c1_FrameMap_riscv64.cpp for detail.
// in c1_FrameMap_riscv.cpp for detail.
const static Register caller_save_cpu_regs[FrameMap::max_nof_caller_save_cpu_regs] = {x7, x10, x11, x12,
x13, x14, x15, x16, x17,
x28, x29, x30, x31};
@@ -72,23 +72,24 @@ define_pd_global(intx, InitArrayShortSize, BytesPerLong);

define_pd_global(intx, InlineSmallCode, 1000);

#define ARCH_FLAGS(develop, \
product, \
notproduct, \
range, \
constraint) \
\
product(bool, NearCpool, true, \
"constant pool is close to instructions") \
product(intx, BlockZeroingLowLimit, 256, \
"Minimum size in bytes when block zeroing will be used") \
range(1, max_jint) \
product(bool, TraceTraps, false, "Trace all traps the signal handler")\
product(bool, UseConservativeFence, true, \
"Extend i for r and o for w in the pred/succ flags of fence;" \
"Extend fence.i to fence.i + fence.") \
product(bool, UseVExt, false, "Use RVV instructions") \
product(bool, AvoidUnalignedAccesses, true, \
"Avoid generating unaligned memory accesses") \
#define ARCH_FLAGS(develop, \
product, \
notproduct, \
range, \
constraint) \
\
product(bool, NearCpool, true, \
"constant pool is close to instructions") \
product(intx, BlockZeroingLowLimit, 256, \
"Minimum size in bytes when block zeroing will be used") \
range(1, max_jint) \
product(bool, TraceTraps, false, "Trace all traps the signal handler") \
/* For now we're going to be safe and add the I/O bits to userspace fences. */ \
product(bool, UseConservativeFence, true, \
"Extend i for r and o for w in the pred/succ flags of fence;" \
"Extend fence.i to fence.i + fence.") \
product(bool, UseRVV, false, EXPERIMENTAL, "Use RVV instructions") \
product(bool, AvoidUnalignedAccesses, true, \
"Avoid generating unaligned memory accesses") \

#endif // CPU_RISCV_GLOBALS_RISCV_HPP
@@ -47,7 +47,7 @@ void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached
ResourceMark rm;
CodeBuffer code(code_begin, ic_stub_code_size());
MacroAssembler* masm = new MacroAssembler(&code);
// note: even though the code contains an embedded value, we do not need reloc info
// Note: even though the code contains an embedded value, we do not need reloc info
// because
// (1) the value is old (i.e., doesn't matter for scavenges)
// (2) these ICStubs are removed *before* a GC happens, so the roots disappear
@@ -26,14 +26,14 @@
#ifndef CPU_RISCV_ICACHE_RISCV_HPP
#define CPU_RISCV_ICACHE_RISCV_HPP

// Interface for updating the instruction cache. Whenever the VM
// Interface for updating the instruction cache. Whenever the VM
// modifies code, part of the processor instruction cache potentially
// has to be flushed.

class ICache : public AbstractICache {
public:
enum {
stub_size = 16, // Size of the icache flush stub in bytes
stub_size = 16, // Size of the icache flush stub in bytes
line_size = BytesPerWord, // conservative
log2_line_size = LogBytesPerWord // log2(line_size)
};
@@ -223,9 +223,9 @@ void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
assert_different_registers(cache, xcpool);
get_cache_index_at_bcp(index, bcp_offset, index_size);
assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
// convert from field index to ConstantPoolCacheEntry
// Convert from field index to ConstantPoolCacheEntry
// riscv64 already has the cache in xcpool so there is no need to
// install it in cache. instead we pre-add the indexed offset to
// install it in cache. Instead we pre-add the indexed offset to
// xcpool and return it in cache. All clients of this method need to
// be modified accordingly.
slli(cache, index, 5);
@@ -261,7 +261,7 @@ void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
assert(cache != tmp, "must use different register");
get_cache_index_at_bcp(tmp, bcp_offset, index_size);
assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
// convert from field index to ConstantPoolCacheEntry index
// Convert from field index to ConstantPoolCacheEntry index
// and from word offset to byte offset
assert(log2i_exact(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line");
ld(cache, Address(fp, frame::interpreter_frame_cache_offset * wordSize));
@@ -277,7 +277,7 @@ void InterpreterMacroAssembler::load_resolved_reference_at_index(
assert_different_registers(result, index);

get_constant_pool(result);
// load pointer for resolved_references[] objArray
// Load pointer for resolved_references[] objArray
ld(result, Address(result, ConstantPool::cache_offset_in_bytes()));
ld(result, Address(result, ConstantPoolCache::resolved_references_offset_in_bytes()));
resolve_oop_handle(result, tmp);
@@ -593,7 +593,7 @@ void InterpreterMacroAssembler::remove_activation(
bool throw_monitor_exception,
bool install_monitor_exception,
bool notify_jvmdi) {
// Note: Registers x13 xmm0 may be in use for the
// Note: Registers x13 may be in use for the
// result check if synchronized method
Label unlocked, unlock, no_unlock;

@@ -802,7 +802,7 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg)

Label slow_case;

// Load object pointer into obj_reg %c_rarg3
// Load object pointer into obj_reg c_rarg3
ld(obj_reg, Address(lock_reg, obj_offset));

if (DiagnoseSyncOnValueBasedClasses != 0) {
@@ -826,13 +826,13 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg)

// Test if the oopMark is an obvious stack pointer, i.e.,
// 1) (mark & 7) == 0, and
// 2) rsp <= mark < mark + os::pagesize()
// 2) sp <= mark < mark + os::pagesize()
//
// These 3 tests can be done by evaluating the following
// expression: ((mark - rsp) & (7 - os::vm_page_size())),
// expression: ((mark - sp) & (7 - os::vm_page_size())),
// assuming both stack pointer and pagesize have their
// least significant 3 bits clear.
// NOTE: the oopMark is in swap_reg %x10 as the result of cmpxchg
// NOTE: the oopMark is in swap_reg x10 as the result of cmpxchg
sub(swap_reg, swap_reg, sp);
li(t0, (int64_t)(7 - os::vm_page_size()));
andr(swap_reg, swap_reg, t0);
@@ -880,10 +880,10 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg)
save_bcp(); // Save in case of exception

// Convert from BasicObjectLock structure to object and BasicLock
// structure Store the BasicLock address into %x10
// structure Store the BasicLock address into x10
la(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes()));

// Load oop into obj_reg(%c_rarg3)
// Load oop into obj_reg(c_rarg3)
ld(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));

// Free entry
@@ -1485,7 +1485,7 @@ void InterpreterMacroAssembler::profile_switch_case(Register index,
if (ProfileInterpreter) {
Label profile_continue;

// if no method data exists, go to profile_continue.
// If no method data exists, go to profile_continue.
test_method_data_pointer(mdp, profile_continue);

// Build the base (index * per_case_size_in_bytes()) +
@@ -1663,8 +1663,8 @@ void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& md
xorr(obj, obj, t0);
andi(t0, obj, TypeEntries::type_klass_mask);
beqz(t0, next); // klass seen before, nothing to
// do. The unknown bit may have been
// set already but no need to check.
// do. The unknown bit may have been
// set already but no need to check.

andi(t0, obj, TypeEntries::type_unknown);
bnez(t0, next);
@@ -1896,7 +1896,6 @@ void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register t
neg(tmp2, tmp2);

// read the parameter from the local area

slli(tmp2, tmp2, Interpreter::logStackElementSize);
add(tmp2, tmp2, xlocals);
ld(tmp2, Address(tmp2, 0));
@@ -1191,7 +1191,7 @@ static int patch_offset_in_jal(address branch, int64_t offset) {
Assembler::patch(branch, 30, 21, (offset >> 1) & 0x3ff); // offset[10:1] ==> branch[30:21]
Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1); // offset[11] ==> branch[20]
Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff); // offset[19:12] ==> branch[19:12]
return NativeInstruction::instruction_size; // only one instruction
return NativeInstruction::instruction_size; // only one instruction
}

static int patch_offset_in_conditional_branch(address branch, int64_t offset) {
@@ -2584,7 +2584,7 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
#undef final_jmp
}

// scans count pointer sized words at [addr] for occurence of value,
// Scans count pointer sized words at [addr] for occurence of value,
// generic
void MacroAssembler::repne_scan(Register addr, Register value, Register count,
Register temp) {
@@ -2618,7 +2618,7 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,

assert(label_nulls <= 1, "at most one NULL in the batch");

// a couple of usefule fields in sub_klass:
// A couple of usefule fields in sub_klass:
int ss_offset = in_bytes(Klass::secondary_supers_offset());
int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
Address secondary_supers_addr(sub_klass, ss_offset);
@@ -3028,7 +3028,7 @@ void MacroAssembler::compute_index(Register haystack, Register trailing_zeros,
}

// string indexof
// find pattern element in src, compute match mask,
// Find pattern element in src, compute match mask,
// only the first occurrence of 0x80/0x8000 at low bits is the valid match index
// match mask patterns and corresponding indices would be like:
// - 0x8080808080808080 (Latin1)
@@ -3045,7 +3045,7 @@ void MacroAssembler::compute_match_mask(Register src, Register pattern, Register
andr(match_mask, match_mask, src);
}

// count bits of trailing zero chars from lsb to msb until first non-zero element.
// Count bits of trailing zero chars from lsb to msb until first non-zero element.
// For LL case, one byte for one element, so shift 8 bits once, and for other case,
// shift 16 bits once.
void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register Rtmp1, Register Rtmp2)
@@ -35,7 +35,7 @@
static const bool implements_scalable_vector = true;

static const bool supports_scalable_vector() {
return UseVExt;
return UseRVV;
}

// riscv64 supports misaligned vectors store/load.
@@ -190,7 +190,7 @@ address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler*

// x30: sender SP (must preserve; see prepare_to_jump_from_interpreted)
// xmethod: Method*
// x13: argument locator (parameter slot count, added to rsp)
// x13: argument locator (parameter slot count, added to sp)
// x11: used as temp to hold mh or receiver
// x10, x29: garbage temps, blown away
Register argp = x13; // argument list ptr, live on error paths

0 comments on commit dfde4e4

Please sign in to comment.