Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8279827: riscv: RVB: Add shift and add instructions #43

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions src/hotspot/cpu/riscv/assembler_riscv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1899,11 +1899,17 @@ enum Nf {
emit(insn); \
}

INSN(add_uw, 0b0111011, 0b000, 0b0000100);
INSN(rol, 0b0110011, 0b001, 0b0110000);
INSN(rolw, 0b0111011, 0b001, 0b0110000);
INSN(ror, 0b0110011, 0b101, 0b0110000);
INSN(rorw, 0b0111011, 0b101, 0b0110000);
INSN(add_uw, 0b0111011, 0b000, 0b0000100);
INSN(rol, 0b0110011, 0b001, 0b0110000);
INSN(rolw, 0b0111011, 0b001, 0b0110000);
INSN(ror, 0b0110011, 0b101, 0b0110000);
INSN(rorw, 0b0111011, 0b101, 0b0110000);
INSN(sh1add, 0b0110011, 0b010, 0b0010000);
INSN(sh2add, 0b0110011, 0b100, 0b0010000);
INSN(sh3add, 0b0110011, 0b110, 0b0010000);
INSN(sh1add_uw, 0b0111011, 0b010, 0b0010000);
INSN(sh2add_uw, 0b0111011, 0b100, 0b0010000);
INSN(sh3add_uw, 0b0111011, 0b110, 0b0010000);

#undef INSN

Expand Down
8 changes: 3 additions & 5 deletions src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
* Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -349,12 +349,10 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
void LIR_Assembler::arraycopy_prepare_params(Register src, Register src_pos, Register length,
Register dst, Register dst_pos, BasicType basic_type) {
int scale = array_element_size(basic_type);
__ slli(t0, src_pos, scale);
__ add(c_rarg0, src, t0);
__ shadd(c_rarg0, src_pos, src, t0, scale);
__ add(c_rarg0, c_rarg0, arrayOopDesc::base_offset_in_bytes(basic_type));
assert_different_registers(c_rarg0, dst, dst_pos, length);
__ slli(t0, dst_pos, scale);
__ add(c_rarg1, dst, t0);
__ shadd(c_rarg1, dst_pos, dst, t0, scale);
__ add(c_rarg1, c_rarg1, arrayOopDesc::base_offset_in_bytes(basic_type));
assert_different_registers(c_rarg1, dst, length);
__ mv(c_rarg2, length);
Expand Down
3 changes: 1 addition & 2 deletions src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1965,8 +1965,7 @@ Address LIR_Assembler::as_Address(LIR_Address* addr, Register tmp) {
index = index_op->as_register_lo();
}
if (scale != 0) {
__ slli(tmp, index, scale);
__ add(tmp, base, tmp);
__ shadd(tmp, index, base, tmp, scale);
} else {
__ add(tmp, base, index);
}
Expand Down
5 changes: 2 additions & 3 deletions src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
* Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -272,8 +272,7 @@ void C1_MacroAssembler::allocate_array(Register obj, Register len, Register tmp1
const Register arr_size = tmp2; // okay to be the same
// align object end
mv(arr_size, (int32_t)header_size * BytesPerWord + MinObjAlignmentInBytesMask);
slli(t0, len, f);
add(arr_size, arr_size, t0);
shadd(arr_size, len, arr_size, t0, f);
andi(arr_size, arr_size, ~(uint)MinObjAlignmentInBytesMask);

try_allocate(obj, arr_size, 0, tmp1, tmp2, slow_case);
Expand Down
33 changes: 13 additions & 20 deletions src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -409,8 +409,8 @@ void C2_MacroAssembler::string_indexof(Register haystack, Register needle,
sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern
Register orig_haystack = tmp5;
mv(orig_haystack, haystack);
slli(haystack_end, result_tmp, haystack_chr_shift); // result_tmp = tmp4
add(haystack_end, haystack, haystack_end);
// result_tmp = tmp4
shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift);
sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1
mv(tmp3, needle);

Expand Down Expand Up @@ -439,8 +439,8 @@ void C2_MacroAssembler::string_indexof(Register haystack, Register needle,
sub(ch2, ch2, 1); // for next pattern element, skip distance -1
bgtz(ch2, BCLOOP);

slli(tmp6, needle_len, needle_chr_shift);
add(tmp6, tmp6, needle); // tmp6: pattern end, address after needle
// tmp6: pattern end, address after needle
shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift);
if (needle_isL == haystack_isL) {
// load last 8 bytes (8LL/4UU symbols)
ld(tmp6, Address(tmp6, -wordSize));
Expand Down Expand Up @@ -471,8 +471,7 @@ void C2_MacroAssembler::string_indexof(Register haystack, Register needle,
// move j with bad char offset table
bind(BMLOOPSTR2);
// compare pattern to source string backward
slli(result, nlen_tmp, haystack_chr_shift);
add(result, haystack, result);
shadd(result, nlen_tmp, haystack, result, haystack_chr_shift);
(this->*haystack_load_1chr)(skipch, Address(result), noreg);
sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8
if (needle_isL == haystack_isL) {
Expand All @@ -496,11 +495,9 @@ void C2_MacroAssembler::string_indexof(Register haystack, Register needle,
}

bind(BMLOOPSTR1);
slli(ch1, nlen_tmp, needle_chr_shift);
add(ch1, ch1, needle);
shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift);
(this->*needle_load_1chr)(ch1, Address(ch1), noreg);
slli(ch2, nlen_tmp, haystack_chr_shift);
add(ch2, haystack, ch2);
shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift);
(this->*haystack_load_1chr)(ch2, Address(ch2), noreg);

bind(BMLOOPSTR1_AFTER_LOAD);
Expand All @@ -527,8 +524,8 @@ void C2_MacroAssembler::string_indexof(Register haystack, Register needle,

bind(BMADV);
sub(nlen_tmp, needle_len, 1);
slli(result, result_tmp, haystack_chr_shift);
add(haystack, haystack, result); // move haystack after bad char skip offset
// move haystack after bad char skip offset
shadd(haystack, result_tmp, haystack, result, haystack_chr_shift);
ble(haystack, haystack_end, BMLOOPSTR2);
add(sp, sp, ASIZE);
j(NOMATCH);
Expand Down Expand Up @@ -1377,8 +1374,7 @@ void C2_MacroAssembler::clear_array_v(Register base, Register cnt) {
vsetvli(t0, cnt, Assembler::e64, Assembler::m4);
vse64_v(v0, base);
sub(cnt, cnt, t0);
slli(t0, t0, 3);
add(base, base, t0);
shadd(base, t0, base, t0, 3);
bnez(cnt, loop);
}

Expand Down Expand Up @@ -1464,8 +1460,7 @@ void C2_MacroAssembler::string_compare_v(Register str1, Register str2, Register
bgez(tmp2, DIFFERENCE);
sub(cnt2, cnt2, tmp1);
add(strL, strL, tmp1);
slli(tmp1, tmp1, 1);
add(strU, strU, tmp1);
shadd(strU, tmp1, strU, tmp1, 1);
bnez(cnt2, loop);
j(DONE);
}
Expand Down Expand Up @@ -1493,8 +1488,7 @@ void C2_MacroAssembler::byte_array_inflate_v(Register src, Register dst, Registe
vse16_v(v0, dst);
sub(len, len, tmp);
add(src, src, tmp);
slli(tmp, tmp, 1);
add(dst, dst, tmp);
shadd(dst, tmp, dst, tmp, 1);
bnez(len, loop);
BLOCK_COMMENT("} byte_array_inflate_v");
}
Expand Down Expand Up @@ -1533,8 +1527,7 @@ void C2_MacroAssembler::encode_iso_array_v(Register src, Register dst, Register
add(result, result, t0);
add(dst, dst, t0);
sub(len, len, t0);
slli(t0, t0, 1);
add(src, src, t0);
shadd(src, t0, src, t0, 1);
bnez(len, loop);
j(DONE);

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
* Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -70,8 +70,8 @@ void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembl
const Register end = count;

__ beqz(count, L_done); // zero count - nothing to do
__ slli(count, count, LogBytesPerHeapOop);
__ add(end, start, count); // end = start + count << LogBytesPerHeapOop
// end = start + count << LogBytesPerHeapOop
__ shadd(end, count, start, count, LogBytesPerHeapOop);
__ sub(end, end, BytesPerHeapOop); // last element address to make inclusive

__ srli(start, start, CardTable::card_shift());
Expand Down
38 changes: 13 additions & 25 deletions src/hotspot/cpu/riscv/interp_masm_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,8 +228,7 @@ void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
// install it in cache. Instead we pre-add the indexed offset to
// xcpool and return it in cache. All clients of this method need to
// be modified accordingly.
slli(cache, index, 5);
add(cache, xcpool, cache);
shadd(cache, index, xcpool, cache, 5);
}


Expand Down Expand Up @@ -267,8 +266,8 @@ void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
ld(cache, Address(fp, frame::interpreter_frame_cache_offset * wordSize));
// skip past the header
add(cache, cache, in_bytes(ConstantPoolCache::base_offset()));
slli(tmp, tmp, 2 + LogBytesPerWord);
add(cache, cache, tmp); // construct pointer to cache entry
// construct pointer to cache entry
shadd(cache, tmp, cache, tmp, 2 + LogBytesPerWord);
}

// Load object from cpool->resolved_references(index)
Expand All @@ -283,19 +282,16 @@ void InterpreterMacroAssembler::load_resolved_reference_at_index(
resolve_oop_handle(result, tmp);
// Add in the index
addi(index, index, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
slli(index, index, LogBytesPerHeapOop);
add(result, result, index);
shadd(result, index, result, index, LogBytesPerHeapOop);
load_heap_oop(result, Address(result, 0));
}

void InterpreterMacroAssembler::load_resolved_klass_at_offset(
Register cpool, Register index, Register klass, Register temp) {
slli(temp, index, LogBytesPerWord);
add(temp, temp, cpool);
shadd(temp, index, cpool, temp, LogBytesPerWord);
lhu(temp, Address(temp, sizeof(ConstantPool))); // temp = resolved_klass_index
ld(klass, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes())); // klass = cpool->_resolved_klasses
slli(temp, temp, LogBytesPerWord);
add(klass, klass, temp);
shadd(klass, temp, klass, temp, LogBytesPerWord);
ld(klass, Address(klass, Array<Klass*>::base_offset_in_bytes()));
}

Expand Down Expand Up @@ -529,21 +525,18 @@ void InterpreterMacroAssembler::dispatch_base(TosState state,
if (table == Interpreter::dispatch_table(state)) {
li(t1, Interpreter::distance_from_dispatch_table(state));
add(t1, Rs, t1);
slli(t1, t1, 3);
add(t1, xdispatch, t1);
shadd(t1, t1, xdispatch, t1, 3);
} else {
mv(t1, (address)table);
slli(Rs, Rs, 3);
add(t1, t1, Rs);
shadd(t1, Rs, t1, Rs, 3);
}
ld(t1, Address(t1));
jr(t1);

if (needs_thread_local_poll) {
bind(safepoint);
la(t1, ExternalAddress((address)safepoint_table));
slli(Rs, Rs, 3);
add(t1, t1, Rs);
shadd(t1, Rs, t1, Rs, 3);
ld(t1, Address(t1));
jr(t1);
}
Expand Down Expand Up @@ -1805,8 +1798,7 @@ void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register ca
// CallTypeData/VirtualCallTypeData to reach its end. Non null
// if there's a return to profile.
assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
slli(tmp, tmp, exact_log2(DataLayout::cell_size));
add(mdp, mdp, tmp);
shadd(mdp, tmp, mdp, tmp, exact_log2(DataLayout::cell_size));
}
sd(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
} else {
Expand Down Expand Up @@ -1888,21 +1880,17 @@ void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register t
add(t0, mdp, off_base);
add(t1, mdp, type_base);


slli(tmp2, tmp1, per_arg_scale);
add(tmp2, tmp2, t0);
shadd(tmp2, tmp1, t0, tmp2, per_arg_scale);
// load offset on the stack from the slot for this parameter
ld(tmp2, Address(tmp2, 0));
neg(tmp2, tmp2);

// read the parameter from the local area
slli(tmp2, tmp2, Interpreter::logStackElementSize);
add(tmp2, tmp2, xlocals);
shadd(tmp2, tmp2, xlocals, tmp2, Interpreter::logStackElementSize);
ld(tmp2, Address(tmp2, 0));

// profile the parameter
slli(t0, tmp1, per_arg_scale);
add(t1, t0, t1);
shadd(t1, tmp1, t1, t0, per_arg_scale);
Address arg_type(t1, 0);
profile_obj_type(tmp2, arg_type, tmp3);

Expand Down
Loading