From 22d2397f72f1e501385b5f16fd115f7e742866a8 Mon Sep 17 00:00:00 2001 From: Fei Yang Date: Wed, 18 Dec 2024 11:58:14 +0800 Subject: [PATCH 1/5] 8346478: RISC-V: Refactor add/sub assembler routines --- .../riscv/c1_LIRAssembler_arraycopy_riscv.cpp | 6 +- .../cpu/riscv/c1_LIRAssembler_riscv.cpp | 6 +- .../cpu/riscv/c1_MacroAssembler_riscv.cpp | 10 +- src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp | 8 +- .../cpu/riscv/c2_MacroAssembler_riscv.cpp | 98 +++++---- .../gc/g1/g1BarrierSetAssembler_riscv.cpp | 6 +- .../cardTableBarrierSetAssembler_riscv.cpp | 4 +- .../shenandoahBarrierSetAssembler_riscv.cpp | 8 +- .../riscv/gc/z/zBarrierSetAssembler_riscv.cpp | 8 +- src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 40 ++-- src/hotspot/cpu/riscv/interpreterRT_riscv.cpp | 6 +- .../cpu/riscv/macroAssembler_riscv.cpp | 150 +++++++------- .../cpu/riscv/macroAssembler_riscv.hpp | 22 +- src/hotspot/cpu/riscv/riscv.ad | 24 +-- src/hotspot/cpu/riscv/runtime_riscv.cpp | 16 +- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 14 +- src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 192 +++++++++--------- .../templateInterpreterGenerator_riscv.cpp | 38 ++-- src/hotspot/cpu/riscv/templateTable_riscv.cpp | 108 +++++----- 19 files changed, 386 insertions(+), 378 deletions(-) diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp index 7d673383cad26..029c4069c0700 100644 --- a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp @@ -211,7 +211,7 @@ void LIR_Assembler::arraycopy_type_check(Register src, Register src_pos, Registe Label cont, slow; #define PUSH(r1, r2) \ - __ addi(sp, sp, -2 * wordSize); \ + __ subi(sp, sp, 2 * wordSize); \ __ sd(r1, Address(sp, 1 * wordSize)); \ __ sd(r2, Address(sp, 0)); @@ -337,10 +337,10 @@ void LIR_Assembler::arraycopy_prepare_params(Register src, Register src_pos, Reg Register dst, Register dst_pos, BasicType basic_type) { int scale = array_element_size(basic_type); __ shadd(c_rarg0, src_pos, src, t0, scale); - __ add(c_rarg0, c_rarg0, arrayOopDesc::base_offset_in_bytes(basic_type)); + __ addi(c_rarg0, c_rarg0, arrayOopDesc::base_offset_in_bytes(basic_type)); assert_different_registers(c_rarg0, dst, dst_pos, length); __ shadd(c_rarg1, dst_pos, dst, t0, scale); - __ add(c_rarg1, c_rarg1, arrayOopDesc::base_offset_in_bytes(basic_type)); + __ addi(c_rarg1, c_rarg1, arrayOopDesc::base_offset_in_bytes(basic_type)); assert_different_registers(c_rarg1, dst, length); __ mv(c_rarg2, length); assert_different_registers(c_rarg2, dst); diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp index d587a557a7312..6ac3ebe518497 100644 --- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp @@ -1084,7 +1084,7 @@ void LIR_Assembler::typecheck_helper_slowcheck(ciKlass *k, Register obj, Registe // check for self __ beq(klass_RInfo, k_RInfo, *success_target); - __ addi(sp, sp, -2 * wordSize); // 2: store k_RInfo and klass_RInfo + __ subi(sp, sp, 2 * wordSize); // 2: store k_RInfo and klass_RInfo __ sd(k_RInfo, Address(sp, 0)); // sub klass __ sd(klass_RInfo, Address(sp, wordSize)); // super klass __ far_call(RuntimeAddress(Runtime1::entry_for(C1StubId::slow_subtype_check_id))); @@ -1099,7 +1099,7 @@ void LIR_Assembler::typecheck_helper_slowcheck(ciKlass *k, Register obj, Registe // perform the fast part of the checking logic __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, nullptr); // call out-of-line instance of __ check_klass_subtytpe_slow_path(...) - __ addi(sp, sp, -2 * wordSize); // 2: store k_RInfo and klass_RInfo + __ subi(sp, sp, 2 * wordSize); // 2: store k_RInfo and klass_RInfo __ sd(klass_RInfo, Address(sp, wordSize)); // sub klass __ sd(k_RInfo, Address(sp, 0)); // super klass __ far_call(RuntimeAddress(Runtime1::entry_for(C1StubId::slow_subtype_check_id))); @@ -2139,7 +2139,7 @@ void LIR_Assembler::lir_store_slowcheck(Register k_RInfo, Register klass_RInfo, // perform the fast part of the checking logic __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, nullptr); // call out-of-line instance of __ check_klass_subtype_slow_path(...) - __ addi(sp, sp, -2 * wordSize); // 2: store k_RInfo and klass_RInfo + __ subi(sp, sp, 2 * wordSize); // 2: store k_RInfo and klass_RInfo __ sd(klass_RInfo, Address(sp, wordSize)); // sub klass __ sd(k_RInfo, Address(sp, 0)); // super klass __ far_call(RuntimeAddress(Runtime1::entry_for(C1StubId::slow_subtype_check_id))); diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp index ed932dddcd8d8..10aa5514fdf3e 100644 --- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp @@ -199,16 +199,16 @@ void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int Label done; // len_in_bytes is positive and ptr sized - sub(len_in_bytes, len_in_bytes, hdr_size_in_bytes); + subi(len_in_bytes, len_in_bytes, hdr_size_in_bytes); beqz(len_in_bytes, done); // Preserve obj if (hdr_size_in_bytes) { - add(obj, obj, hdr_size_in_bytes); + addi(obj, obj, hdr_size_in_bytes); } zero_memory(obj, len_in_bytes, tmp); if (hdr_size_in_bytes) { - sub(obj, obj, hdr_size_in_bytes); + subi(obj, obj, hdr_size_in_bytes); } bind(done); @@ -262,7 +262,7 @@ void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register j(entry_point); bind(loop); - sub(index, index, 1); + subi(index, index, 1); for (int i = -unroll; i < 0; i++) { if (-i == remainder) { bind(entry_point); @@ -272,7 +272,7 @@ void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register if (remainder == 0) { bind(entry_point); } - add(t0, t0, unroll * wordSize); + addi(t0, t0, unroll * wordSize); bnez(index, loop); } } diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp index 6f59f5c2b9559..717f10ca30d57 100644 --- a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp @@ -147,7 +147,7 @@ int StubAssembler::call_RT(Register oop_result, Register metadata_result, addres const int arg1_sp_offset = 0; const int arg2_sp_offset = 1; const int arg3_sp_offset = 2; - addi(sp, sp, -(arg_num + 1) * wordSize); + subi(sp, sp, (arg_num + 1) * wordSize); sd(arg1, Address(sp, arg1_sp_offset * wordSize)); sd(arg2, Address(sp, arg2_sp_offset * wordSize)); sd(arg3, Address(sp, arg3_sp_offset * wordSize)); @@ -301,14 +301,14 @@ static OopMap* save_live_registers(StubAssembler* sasm, if (save_fpu_registers) { // float registers - __ addi(sp, sp, -(FrameMap::nof_fpu_regs * wordSize)); + __ subi(sp, sp, FrameMap::nof_fpu_regs * wordSize); for (int i = 0; i < FrameMap::nof_fpu_regs; i++) { __ fsd(as_FloatRegister(i), Address(sp, i * wordSize)); } } else { // we define reg_save_layout = 62 as the fixed frame size, // we should also sub 32 * wordSize to sp when save_fpu_registers == false - __ addi(sp, sp, -32 * wordSize); + __ subi(sp, sp, 32 * wordSize); } return generate_oop_map(sasm, save_fpu_registers); @@ -543,7 +543,7 @@ void Runtime1::generate_unwind_exception(StubAssembler *sasm) { // Save our return address because // exception_handler_for_return_address will destroy it. We also // save exception_oop - __ addi(sp, sp, -2 * wordSize); + __ subi(sp, sp, 2 * wordSize); __ sd(exception_oop, Address(sp, wordSize)); __ sd(ra, Address(sp)); diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp index a075895cd6c95..0455e7b4922fd 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp @@ -211,16 +211,14 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, // Handle existing monitor. bind(object_has_monitor); - STATIC_ASSERT(markWord::monitor_value <= INT_MAX); - add(tmp, tmp, -(int)markWord::monitor_value); // monitor - + subi(tmp, tmp, (int)markWord::monitor_value); // monitor ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset())); Label notRecursive; beqz(disp_hdr, notRecursive); // Will be 0 if not recursive. // Recursive lock - addi(disp_hdr, disp_hdr, -1); + subi(disp_hdr, disp_hdr, 1); sd(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset())); j(unlocked); @@ -537,7 +535,7 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register box, if (!UseObjectMonitorTable) { assert(tmp1_monitor == tmp1_mark, "should be the same here"); // Untag the monitor. - add(tmp1_monitor, tmp1_mark, -(int)markWord::monitor_value); + subi(tmp1_monitor, tmp1_mark, (int)markWord::monitor_value); } else { ld(tmp1_monitor, Address(box, BasicLock::object_monitor_cache_offset_in_bytes())); // No valid pointer below alignof(ObjectMonitor*). Take the slow path. @@ -553,7 +551,7 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register box, beqz(tmp2_recursions, not_recursive); // Recursive unlock. - addi(tmp2_recursions, tmp2_recursions, -1); + subi(tmp2_recursions, tmp2_recursions, 1); sd(tmp2_recursions, Address(tmp1_monitor, ObjectMonitor::recursions_offset())); j(unlocked); @@ -732,7 +730,7 @@ void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1, BLOCK_COMMENT("string_indexof_char {"); beqz(cnt1, NOMATCH); - addi(t0, cnt1, isL ? -32 : -16); + subi(t0, cnt1, isL ? 32 : 16); bgtz(t0, DO_LONG); string_indexof_char_short(str1, cnt1, ch, result, isL); j(DONE); @@ -780,7 +778,7 @@ void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1, bind(CH1_LOOP); ld(ch1, Address(str1)); addi(str1, str1, 8); - addi(cnt1, cnt1, -8); + subi(cnt1, cnt1, 8); compute_match_mask(ch1, ch, match_mask, mask1, mask2); bnez(match_mask, HIT); bgtz(cnt1, CH1_LOOP); @@ -956,7 +954,7 @@ void C2_MacroAssembler::string_indexof(Register haystack, Register needle, const int ASIZE = 256; const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd) - sub(sp, sp, ASIZE); + subi(sp, sp, ASIZE); // init BC offset table with default value: needle_len slli(t0, needle_len, 8); @@ -975,16 +973,16 @@ void C2_MacroAssembler::string_indexof(Register haystack, Register needle, for (int i = 0; i < 4; i++) { sd(tmp5, Address(ch1, i * wordSize)); } - add(ch1, ch1, 32); - sub(tmp6, tmp6, 4); + addi(ch1, ch1, 32); + subi(tmp6, tmp6, 4); bgtz(tmp6, BM_INIT_LOOP); - sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern + subi(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern Register orig_haystack = tmp5; mv(orig_haystack, haystack); // result_tmp = tmp4 shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift); - sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1 + subi(ch2, needle_len, 1); // bc offset init value, ch2 is t1 mv(tmp3, needle); // for (i = 0; i < m - 1; ) { @@ -999,7 +997,7 @@ void C2_MacroAssembler::string_indexof(Register haystack, Register needle, // } bind(BCLOOP); (this->*needle_load_1chr)(ch1, Address(tmp3), noreg); - add(tmp3, tmp3, needle_chr_size); + addi(tmp3, tmp3, needle_chr_size); if (!needle_isL) { // ae == StrIntrinsicNode::UU mv(tmp6, ASIZE); @@ -1009,7 +1007,7 @@ void C2_MacroAssembler::string_indexof(Register haystack, Register needle, sb(ch2, Address(tmp4)); // store skip offset to BC offset table bind(BCSKIP); - sub(ch2, ch2, 1); // for next pattern element, skip distance -1 + subi(ch2, ch2, 1); // for next pattern element, skip distance -1 bgtz(ch2, BCLOOP); // tmp6: pattern end, address after needle @@ -1046,7 +1044,7 @@ void C2_MacroAssembler::string_indexof(Register haystack, Register needle, // compare pattern to source string backward shadd(result, nlen_tmp, haystack, result, haystack_chr_shift); (this->*haystack_load_1chr)(skipch, Address(result), noreg); - sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8 + subi(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8 if (needle_isL == haystack_isL) { // re-init tmp3. It's for free because it's executed in parallel with // load above. Alternative is to initialize it before loop, but it'll @@ -1065,7 +1063,7 @@ void C2_MacroAssembler::string_indexof(Register haystack, Register needle, if (isLL) { j(BMLOOPSTR1_AFTER_LOAD); } else { - sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8 + subi(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8 j(BMLOOPSTR1_CMP); } @@ -1076,7 +1074,7 @@ void C2_MacroAssembler::string_indexof(Register haystack, Register needle, (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); bind(BMLOOPSTR1_AFTER_LOAD); - sub(nlen_tmp, nlen_tmp, 1); + subi(nlen_tmp, nlen_tmp, 1); bltz(nlen_tmp, BMLOOPSTR1_LASTCMP); bind(BMLOOPSTR1_CMP); @@ -1098,11 +1096,11 @@ void C2_MacroAssembler::string_indexof(Register haystack, Register needle, lbu(result_tmp, Address(result_tmp)); // load skip offset bind(BMADV); - sub(nlen_tmp, needle_len, 1); + subi(nlen_tmp, needle_len, 1); // move haystack after bad char skip offset shadd(haystack, result_tmp, haystack, result, haystack_chr_shift); ble(haystack, haystack_end, BMLOOPSTR2); - add(sp, sp, ASIZE); + addi(sp, sp, ASIZE); j(NOMATCH); bind(BMLOOPSTR1_LASTCMP); @@ -1113,11 +1111,11 @@ void C2_MacroAssembler::string_indexof(Register haystack, Register needle, if (!haystack_isL) { srli(result, result, 1); } - add(sp, sp, ASIZE); + addi(sp, sp, ASIZE); j(DONE); bind(LINEARSTUB); - sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm + subi(t0, needle_len, 16); // small patterns still should be handled by simple algorithm bltz(t0, LINEARSEARCH); mv(result, zr); RuntimeAddress stub = nullptr; @@ -1196,7 +1194,7 @@ void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register ne if (needle_con_cnt == -1) { Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT; - sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2); + subi(t0, needle_len, needle_isL == haystack_isL ? 4 : 2); bltz(t0, DOSHORT); (this->*needle_load_1chr)(first, Address(needle), noreg); @@ -1213,13 +1211,13 @@ void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register ne beq(first, ch2, STR1_LOOP); bind(STR2_NEXT); - add(hlen_neg, hlen_neg, haystack_chr_size); + addi(hlen_neg, hlen_neg, haystack_chr_size); blez(hlen_neg, FIRST_LOOP); j(NOMATCH); bind(STR1_LOOP); - add(nlen_tmp, nlen_neg, needle_chr_size); - add(hlen_tmp, hlen_neg, haystack_chr_size); + addi(nlen_tmp, nlen_neg, needle_chr_size); + addi(hlen_tmp, hlen_neg, haystack_chr_size); bgez(nlen_tmp, MATCH); bind(STR1_NEXT); @@ -1228,14 +1226,14 @@ void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register ne add(ch2, haystack, hlen_tmp); (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); bne(ch1, ch2, STR2_NEXT); - add(nlen_tmp, nlen_tmp, needle_chr_size); - add(hlen_tmp, hlen_tmp, haystack_chr_size); + addi(nlen_tmp, nlen_tmp, needle_chr_size); + addi(hlen_tmp, hlen_tmp, haystack_chr_size); bltz(nlen_tmp, STR1_NEXT); j(MATCH); bind(DOSHORT); if (needle_isL == haystack_isL) { - sub(t0, needle_len, 2); + subi(t0, needle_len, 2); bltz(t0, DO1); bgtz(t0, DO3); } @@ -1244,7 +1242,7 @@ void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register ne if (needle_con_cnt == 4) { Label CH1_LOOP; (this->*load_4chr)(ch1, Address(needle), noreg); - sub(result_tmp, haystack_len, 4); + subi(result_tmp, haystack_len, 4); slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp add(haystack, haystack, tmp3); neg(hlen_neg, tmp3); @@ -1273,7 +1271,7 @@ void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register ne (this->*load_4chr)(ch2, Address(tmp3), noreg); } beq(ch1, ch2, MATCH); - add(hlen_neg, hlen_neg, haystack_chr_size); + addi(hlen_neg, hlen_neg, haystack_chr_size); blez(hlen_neg, CH1_LOOP); j(NOMATCH); } @@ -1284,7 +1282,7 @@ void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register ne bind(DO2); (this->*load_2chr)(ch1, Address(needle), noreg); if (needle_con_cnt == 2) { - sub(result_tmp, haystack_len, 2); + subi(result_tmp, haystack_len, 2); } slli(tmp3, result_tmp, haystack_chr_shift); add(haystack, haystack, tmp3); @@ -1307,7 +1305,7 @@ void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register ne (this->*load_2chr)(ch2, Address(tmp3), noreg); } beq(ch1, ch2, MATCH); - add(hlen_neg, hlen_neg, haystack_chr_size); + addi(hlen_neg, hlen_neg, haystack_chr_size); blez(hlen_neg, CH1_LOOP); j(NOMATCH); BLOCK_COMMENT("} string_indexof DO2"); @@ -1321,7 +1319,7 @@ void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register ne (this->*load_2chr)(first, Address(needle), noreg); (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg); if (needle_con_cnt == 3) { - sub(result_tmp, haystack_len, 3); + subi(result_tmp, haystack_len, 3); } slli(hlen_tmp, result_tmp, haystack_chr_shift); add(haystack, haystack, hlen_tmp); @@ -1340,12 +1338,12 @@ void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register ne beq(first, ch2, STR1_LOOP); bind(STR2_NEXT); - add(hlen_neg, hlen_neg, haystack_chr_size); + addi(hlen_neg, hlen_neg, haystack_chr_size); blez(hlen_neg, FIRST_LOOP); j(NOMATCH); bind(STR1_LOOP); - add(hlen_tmp, hlen_neg, 2 * haystack_chr_size); + addi(hlen_tmp, hlen_neg, 2 * haystack_chr_size); add(ch2, haystack, hlen_tmp); (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); bne(ch1, ch2, STR2_NEXT); @@ -1359,7 +1357,7 @@ void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register ne BLOCK_COMMENT("string_indexof DO1 {"); bind(DO1); (this->*needle_load_1chr)(ch1, Address(needle), noreg); - sub(result_tmp, haystack_len, 1); + subi(result_tmp, haystack_len, 1); slli(tmp3, result_tmp, haystack_chr_shift); add(haystack, haystack, tmp3); neg(hlen_neg, tmp3); @@ -1368,7 +1366,7 @@ void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register ne add(tmp3, haystack, hlen_neg); (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg); beq(ch1, ch2, MATCH); - add(hlen_neg, hlen_neg, haystack_chr_size); + addi(hlen_neg, hlen_neg, haystack_chr_size); blez(hlen_neg, DO1_LOOP); BLOCK_COMMENT("} string_indexof DO1"); } @@ -1443,7 +1441,7 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2, ld(tmp2, Address(str2)); mv(t0, STUB_THRESHOLD); bge(cnt2, t0, STUB); - sub(cnt2, cnt2, minCharsInWord); + subi(cnt2, cnt2, minCharsInWord); beqz(cnt2, TAIL_CHECK); // convert cnt2 from characters to bytes if (!str1_isL) { @@ -1457,7 +1455,7 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2, ld(tmp2, Address(str2)); mv(t0, STUB_THRESHOLD); bge(cnt2, t0, STUB); - addi(cnt2, cnt2, -4); + subi(cnt2, cnt2, 4); add(str1, str1, cnt2); sub(cnt1, zr, cnt2); slli(cnt2, cnt2, 1); @@ -1584,13 +1582,13 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2, // while comparing previous (this->*str1_load_chr)(tmp1, Address(str1), t0); addi(str1, str1, str1_chr_size); - addi(cnt2, cnt2, -1); + subi(cnt2, cnt2, 1); beqz(cnt2, SHORT_LAST_INIT); (this->*str2_load_chr)(cnt1, Address(str2), t0); addi(str2, str2, str2_chr_size); j(SHORT_LOOP_START); bind(SHORT_LOOP); - addi(cnt2, cnt2, -1); + subi(cnt2, cnt2, 1); beqz(cnt2, SHORT_LAST); bind(SHORT_LOOP_START); (this->*str1_load_chr)(tmp2, Address(str1), t0); @@ -1598,7 +1596,7 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2, (this->*str2_load_chr)(t0, Address(str2), t0); addi(str2, str2, str2_chr_size); bne(tmp1, cnt1, SHORT_LOOP_TAIL); - addi(cnt2, cnt2, -1); + subi(cnt2, cnt2, 1); beqz(cnt2, SHORT_LAST2); (this->*str1_load_chr)(tmp1, Address(str1), t0); addi(str1, str1, str1_chr_size); @@ -1633,7 +1631,7 @@ void C2_MacroAssembler::arrays_equals(Register a1, Register a2, assert(elem_size == 1 || elem_size == 2, "must be char or byte"); assert_different_registers(a1, a2, result, tmp1, tmp2, tmp3, t0); - int elem_per_word = wordSize/elem_size; + int elem_per_word = wordSize / elem_size; int log_elem_size = exact_log2(elem_size); int length_offset = arrayOopDesc::length_offset_in_bytes(); int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); @@ -1662,14 +1660,14 @@ void C2_MacroAssembler::arrays_equals(Register a1, Register a2, la(a1, Address(a1, base_offset)); la(a2, Address(a2, base_offset)); // Check for short strings, i.e. smaller than wordSize. - addi(cnt1, cnt1, -elem_per_word); + subi(cnt1, cnt1, elem_per_word); bltz(cnt1, SHORT); // Main 8 byte comparison loop. bind(NEXT_WORD); { ld(tmp1, Address(a1)); ld(tmp2, Address(a2)); - addi(cnt1, cnt1, -elem_per_word); + subi(cnt1, cnt1, elem_per_word); addi(a1, a1, wordSize); addi(a2, a2, wordSize); bne(tmp1, tmp2, DONE); @@ -1741,14 +1739,14 @@ void C2_MacroAssembler::string_equals(Register a1, Register a2, mv(result, false); // Check for short strings, i.e. smaller than wordSize. - addi(cnt1, cnt1, -wordSize); + subi(cnt1, cnt1, wordSize); bltz(cnt1, SHORT); // Main 8 byte comparison loop. bind(NEXT_WORD); { ld(tmp1, Address(a1)); ld(tmp2, Address(a2)); - addi(cnt1, cnt1, -wordSize); + subi(cnt1, cnt1, wordSize); addi(a1, a1, wordSize); addi(a2, a2, wordSize); bne(tmp1, tmp2, DONE); @@ -1836,7 +1834,7 @@ void C2_MacroAssembler::arrays_hashcode(Register ary, Register cnt, Register res beqz(cnt, DONE); - andi(chunks, cnt, ~(stride-1)); + andi(chunks, cnt, ~(stride - 1)); beqz(chunks, TAIL); mv(pow31_4, 923521); // [31^^4] @@ -1845,7 +1843,7 @@ void C2_MacroAssembler::arrays_hashcode(Register ary, Register cnt, Register res slli(chunks_end, chunks, chunks_end_shift); add(chunks_end, ary, chunks_end); - andi(cnt, cnt, stride-1); // don't forget about tail! + andi(cnt, cnt, stride - 1); // don't forget about tail! bind(WIDE_LOOP); mulw(result, result, pow31_4); // 31^^4 * h diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp index 6a3e4f95b9848..e5d4952e1728c 100644 --- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp @@ -106,7 +106,7 @@ static void generate_queue_test_and_insertion(MacroAssembler* masm, ByteSize ind __ ld(tmp1, Address(thread, in_bytes(index_offset))); // tmp1 := *(index address) __ beqz(tmp1, runtime); // jump to runtime if index == 0 (full buffer) // The buffer is not full, store value into it. - __ sub(tmp1, tmp1, wordSize); // tmp1 := next index + __ subi(tmp1, tmp1, wordSize); // tmp1 := next index __ sd(tmp1, Address(thread, in_bytes(index_offset))); // *(index address) := next index __ ld(tmp2, Address(thread, in_bytes(buffer_offset))); // tmp2 := buffer address __ add(tmp2, tmp2, tmp1); @@ -490,7 +490,7 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* __ ld(tmp, queue_index); __ beqz(tmp, runtime); - __ sub(tmp, tmp, wordSize); + __ subi(tmp, tmp, wordSize); __ sd(tmp, queue_index); __ ld(t1, buffer); __ add(tmp, tmp, t1); @@ -557,7 +557,7 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* __ ld(t0, queue_index); __ beqz(t0, runtime); - __ sub(t0, t0, wordSize); + __ subi(t0, t0, wordSize); __ sd(t0, queue_index); // Reuse RA to hold buffer_addr diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp index 2ad44400687f2..316dae5ead1bf 100644 --- a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp @@ -69,7 +69,7 @@ void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembl __ beqz(count, L_done); // zero count - nothing to do // end = start + count << LogBytesPerHeapOop __ shadd(end, count, start, count, LogBytesPerHeapOop); - __ sub(end, end, BytesPerHeapOop); // last element address to make inclusive + __ subi(end, end, BytesPerHeapOop); // last element address to make inclusive __ srli(start, start, CardTable::card_shift()); __ srli(end, end, CardTable::card_shift()); @@ -81,7 +81,7 @@ void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembl __ bind(L_loop); __ add(tmp, start, count); __ sb(zr, Address(tmp)); - __ sub(count, count, 1); + __ subi(count, count, 1); __ bgez(count, L_loop); __ bind(L_done); } diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp index 257d445f01187..8ae97492affda 100644 --- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp @@ -143,7 +143,7 @@ void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, __ ld(tmp1, index); // tmp := *index_adr __ beqz(tmp1, runtime); // tmp == 0? If yes, goto runtime - __ sub(tmp1, tmp1, wordSize); // tmp := tmp - wordSize + __ subi(tmp1, tmp1, wordSize); // tmp := tmp - wordSize __ sd(tmp1, index); // *index_adr := tmp __ ld(tmp2, buffer); __ add(tmp1, tmp1, tmp2); // tmp := tmp + *buffer_adr @@ -562,7 +562,7 @@ void ShenandoahBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssemb // end = start + count << LogBytesPerHeapOop // last element address to make inclusive __ shadd(end, count, start, tmp, LogBytesPerHeapOop); - __ sub(end, end, BytesPerHeapOop); + __ subi(end, end, BytesPerHeapOop); __ srli(start, start, CardTable::card_shift()); __ srli(end, end, CardTable::card_shift()); @@ -575,7 +575,7 @@ void ShenandoahBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssemb __ bind(L_loop); __ add(tmp, start, count); __ sb(zr, Address(tmp)); - __ sub(count, count, 1); + __ subi(count, count, 1); __ bgez(count, L_loop); __ bind(L_done); } @@ -690,7 +690,7 @@ void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAss __ ld(tmp, queue_index); __ beqz(tmp, runtime); - __ sub(tmp, tmp, wordSize); + __ subi(tmp, tmp, wordSize); __ sd(tmp, queue_index); __ ld(t1, buffer); __ add(tmp, tmp, t1); diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp index 0b093838b8b68..ad4f243e5e1e3 100644 --- a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp @@ -241,7 +241,7 @@ static void store_barrier_buffer_add(MacroAssembler* masm, __ beqz(tmp2, slow_path); // Bump the pointer - __ sub(tmp2, tmp2, sizeof(ZStoreBarrierEntry)); + __ subi(tmp2, tmp2, sizeof(ZStoreBarrierEntry)); __ sd(tmp2, Address(tmp1, ZStoreBarrierBuffer::current_offset())); // Compute the buffer entry address @@ -848,10 +848,10 @@ void ZBarrierSetAssembler::generate_c1_load_barrier_stub(LIR_Assembler* ce, // Save x10 unless it is the result or tmp register // Set up SP to accommdate parameters and maybe x10. if (ref != x10 && tmp != x10) { - __ sub(sp, sp, 32); + __ subi(sp, sp, 32); __ sd(x10, Address(sp, 16)); } else { - __ sub(sp, sp, 16); + __ subi(sp, sp, 16); } // Setup arguments and call runtime stub @@ -963,7 +963,7 @@ void ZBarrierSetAssembler::generate_c1_store_barrier_stub(LIR_Assembler* ce, __ la(stub->new_zpointer()->as_register(), ce->as_Address(stub->ref_addr()->as_address_ptr())); - __ sub(sp, sp, 16); + __ subi(sp, sp, 16); //Setup arguments and call runtime stub assert(stub->new_zpointer()->is_valid(), "invariant"); ce->store_parameter(stub->new_zpointer()->as_register(), 0); diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp index e17a3765b50ec..4299d040b8330 100644 --- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp @@ -266,22 +266,22 @@ void InterpreterMacroAssembler::pop_i(Register r) { void InterpreterMacroAssembler::pop_l(Register r) { ld(r, Address(esp, 0)); - addi(esp, esp, 2 * Interpreter::stackElementSize); + add(esp, esp, 2 * Interpreter::stackElementSize); } void InterpreterMacroAssembler::push_ptr(Register r) { - addi(esp, esp, -wordSize); + subi(esp, esp, wordSize); sd(r, Address(esp, 0)); } void InterpreterMacroAssembler::push_i(Register r) { - addi(esp, esp, -wordSize); + subi(esp, esp, wordSize); sext(r, r, 32); sd(r, Address(esp, 0)); } void InterpreterMacroAssembler::push_l(Register r) { - addi(esp, esp, -2 * wordSize); + subi(esp, esp, 2 * wordSize); sd(zr, Address(esp, wordSize)); sd(r, Address(esp)); } @@ -293,16 +293,16 @@ void InterpreterMacroAssembler::pop_f(FloatRegister r) { void InterpreterMacroAssembler::pop_d(FloatRegister r) { fld(r, Address(esp, 0)); - addi(esp, esp, 2 * Interpreter::stackElementSize); + add(esp, esp, 2 * Interpreter::stackElementSize); } void InterpreterMacroAssembler::push_f(FloatRegister r) { - addi(esp, esp, -wordSize); + subi(esp, esp, wordSize); fsw(r, Address(esp, 0)); } void InterpreterMacroAssembler::push_d(FloatRegister r) { - addi(esp, esp, -2 * wordSize); + subi(esp, esp, 2 * wordSize); fsd(r, Address(esp, 0)); } @@ -895,7 +895,7 @@ void InterpreterMacroAssembler::verify_method_data_pointer() { assert(ProfileInterpreter, "must be profiling interpreter"); #ifdef ASSERT Label verify_continue; - add(sp, sp, -4 * wordSize); + subi(sp, sp, 4 * wordSize); sd(x10, Address(sp, 0)); sd(x11, Address(sp, wordSize)); sd(x12, Address(sp, 2 * wordSize)); @@ -920,7 +920,7 @@ void InterpreterMacroAssembler::verify_method_data_pointer() { ld(x11, Address(sp, wordSize)); ld(x12, Address(sp, 2 * wordSize)); ld(x13, Address(sp, 3 * wordSize)); - add(sp, sp, 4 * wordSize); + addi(sp, sp, 4 * wordSize); #endif // ASSERT } @@ -961,7 +961,7 @@ void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, if (decrement) { ld(t0, addr); - addi(t0, t0, -DataLayout::counter_increment); + subi(t0, t0, DataLayout::counter_increment); Label L; bltz(t0, L); // skip store if counter underflow sd(t0, addr); @@ -1028,7 +1028,7 @@ void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, int constant) { assert(ProfileInterpreter, "must be profiling interpreter"); - addi(mdp_in, mdp_in, (unsigned)constant); + add(mdp_in, mdp_in, (unsigned)constant); sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); } @@ -1037,7 +1037,7 @@ void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { assert(ProfileInterpreter, "must be profiling interpreter"); // save/restore across call_VM - addi(sp, sp, -2 * wordSize); + subi(sp, sp, 2 * wordSize); sd(zr, Address(sp, 0)); sd(return_bci, Address(sp, wordSize)); call_VM(noreg, @@ -1739,7 +1739,7 @@ void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register ca add(t0, mdp, t0); ld(t0, Address(t0)); sub(tmp, tmp, t0); - addi(tmp, tmp, -1); + subi(tmp, tmp, 1); Address arg_addr = argument_address(tmp); ld(tmp, arg_addr); @@ -1752,7 +1752,7 @@ void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register ca profile_obj_type(tmp, mdo_arg_addr, t1); int to_add = in_bytes(TypeStackSlotEntries::per_arg_size()); - addi(off_to_args, off_to_args, to_add); + add(off_to_args, off_to_args, to_add); // increment index by 1 addi(index, index, 1); @@ -1762,7 +1762,7 @@ void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register ca if (MethodData::profile_return()) { ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset()))); - addi(tmp, tmp, -TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count()); + sub(tmp, tmp, TypeProfileArgsLimit * TypeStackSlotEntries::per_arg_count()); } add(t0, mdp, off_to_args); @@ -1849,7 +1849,7 @@ void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register t // mdo start + parameters offset + array length - 1 add(mdp, mdp, tmp1); ld(tmp1, Address(mdp, ArrayData::array_len_offset())); - add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count()); + sub(tmp1, tmp1, TypeStackSlotEntries::per_arg_count()); Label loop; bind(loop); @@ -1875,7 +1875,7 @@ void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register t profile_obj_type(tmp2, arg_type, tmp3); // go to next parameter - add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count()); + sub(tmp1, tmp1, TypeStackSlotEntries::per_arg_count()); bgez(tmp1, loop); bind(profile_continue); @@ -1890,7 +1890,7 @@ void InterpreterMacroAssembler::load_resolved_indy_entry(Register cache, Registe ld(cache, Address(xcpool, in_bytes(ConstantPoolCache::invokedynamic_entries_offset()))); // Scale the index to be the entry index * sizeof(ResolvedIndyEntry) slli(index, index, log2i_exact(sizeof(ResolvedIndyEntry))); - add(cache, cache, Array::base_offset_in_bytes()); + addi(cache, cache, Array::base_offset_in_bytes()); add(cache, cache, index); } @@ -1906,7 +1906,7 @@ void InterpreterMacroAssembler::load_field_entry(Register cache, Register index, } // Get address of field entries array ld(cache, Address(xcpool, ConstantPoolCache::field_entries_offset())); - add(cache, cache, Array::base_offset_in_bytes()); + addi(cache, cache, Array::base_offset_in_bytes()); add(cache, cache, index); // Prevents stale data from being read after the bytecode is patched to the fast bytecode membar(MacroAssembler::LoadLoad); @@ -1932,7 +1932,7 @@ void InterpreterMacroAssembler::load_method_entry(Register cache, Register index // Get address of field entries array ld(cache, Address(xcpool, ConstantPoolCache::method_entries_offset())); - add(cache, cache, Array::base_offset_in_bytes()); + addi(cache, cache, Array::base_offset_in_bytes()); add(cache, cache, index); } diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp index de34d84fecb00..602aacb99485c 100644 --- a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp +++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp @@ -138,10 +138,10 @@ void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { Register reg = next_gpr(); if (reg == c_rarg1) { assert(offset() == 0, "argument register 1 can only be (non-null) receiver"); - __ addi(c_rarg1, from(), Interpreter::local_offset_in_bytes(offset())); + __ add(c_rarg1, from(), Interpreter::local_offset_in_bytes(offset())); } else if (reg != noreg) { // c_rarg2-c_rarg7 - __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset())); + __ add(x10, from(), Interpreter::local_offset_in_bytes(offset())); __ mv(reg, zr); //_num_reg_int_args:c_rarg -> 1:c_rarg2, 2:c_rarg3... __ ld(temp(), x10); Label L; @@ -150,7 +150,7 @@ void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { __ bind(L); } else { //to stack - __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset())); + __ add(x10, from(), Interpreter::local_offset_in_bytes(offset())); __ ld(temp(), x10); Label L; __ bnez(temp(), L); diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp index 27452e7a6842c..3794e0ea9205b 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp @@ -245,7 +245,7 @@ void MacroAssembler::inc_held_monitor_count(Register tmp) { void MacroAssembler::dec_held_monitor_count(Register tmp) { Address dst(xthread, JavaThread::held_monitor_count_offset()); ld(tmp, dst); - addi(tmp, tmp, -1); + subi(tmp, tmp, 1); sd(tmp, dst); #ifdef ASSERT Label ok; @@ -1430,7 +1430,7 @@ void MacroAssembler::restore_cpu_control_state_after_jni(Register tmp) { void MacroAssembler::push_reg(Register Rs) { - addi(esp, esp, 0 - wordSize); + subi(esp, esp, wordSize); sd(Rs, Address(esp, 0)); } @@ -1462,7 +1462,7 @@ int MacroAssembler::push_reg(unsigned int bitset, Register stack) { int offset = is_even(count) ? 0 : wordSize; if (count) { - addi(stack, stack, -count * wordSize - offset); + sub(stack, stack, count * wordSize + offset); } for (int i = count - 1; i >= 0; i--) { sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); @@ -1487,7 +1487,7 @@ int MacroAssembler::pop_reg(unsigned int bitset, Register stack) { } if (count) { - addi(stack, stack, count * wordSize + offset); + add(stack, stack, count * wordSize + offset); } assert(words_popped == count, "oops, popped != count"); @@ -1503,7 +1503,7 @@ int MacroAssembler::push_fp(unsigned int bitset, Register stack) { int push_slots = count + (count & 1); if (count) { - addi(stack, stack, -push_slots * wordSize); + subi(stack, stack, push_slots * wordSize); } for (int i = count - 1; i >= 0; i--) { @@ -1632,7 +1632,7 @@ void MacroAssembler::vector_update_crc32(Register crc, Register buf, Register le Label VectorLoop; Label LastBlock; - add(tableN16, table3, 1*single_table_size*sizeof(juint), tmp1); + add(tableN16, table3, 1 * single_table_size * sizeof(juint), tmp1); mv(tmp5, 0xff); if (MaxVectorSize == 16) { @@ -1651,7 +1651,7 @@ void MacroAssembler::vector_update_crc32(Register crc, Register buf, Register le srli(blks, len, 6); slli(t1, blks, 6); sub(len, len, t1); - sub(blks, blks, 1); + subi(blks, blks, 1); blez(blks, LastBlock); bind(VectorLoop); @@ -1683,7 +1683,7 @@ void MacroAssembler::vector_update_crc32(Register crc, Register buf, Register le addi(tmp1, tmp1, 1); } - sub(blks, blks, 1); + subi(blks, blks, 1); bgtz(blks, VectorLoop); } @@ -2046,7 +2046,7 @@ void MacroAssembler::kernel_crc32_vclmul_fold(Register crc, Register buf, Regist Register vclmul_table = tmp3; la(vclmul_table, table_addr); - add(vclmul_table, vclmul_table, table_num*single_table_size*sizeof(juint), tmp1); + add(vclmul_table, vclmul_table, table_num * single_table_size * sizeof(juint), tmp1); la(table0, table_addr); if (MaxVectorSize == 16) { @@ -2092,25 +2092,25 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, const ExternalAddress table_addr = StubRoutines::crc_table_addr(); la(table0, table_addr); - add(table1, table0, 1*single_table_size*sizeof(juint), tmp1); - add(table2, table0, 2*single_table_size*sizeof(juint), tmp1); - add(table3, table2, 1*single_table_size*sizeof(juint), tmp1); + add(table1, table0, 1 * single_table_size * sizeof(juint), tmp1); + add(table2, table0, 2 * single_table_size * sizeof(juint), tmp1); + add(table3, table2, 1 * single_table_size * sizeof(juint), tmp1); // Ensure basic 4-byte alignment of input byte buffer mv(tmp1, 4); blt(len, tmp1, L_by1_loop); test_bit(tmp1, buf, 0); beqz(tmp1, L_skip1); - subw(len, len, 1); + subiw(len, len, 1); lbu(tmp1, Address(buf)); - add(buf, buf, 1); + addi(buf, buf, 1); update_byte_crc32(crc, tmp1, table0); bind(L_skip1); test_bit(tmp1, buf, 1); beqz(tmp1, L_skip2); - subw(len, len, 2); + subiw(len, len, 2); lhu(tmp1, Address(buf)); - add(buf, buf, 2); + addi(buf, buf, 2); zext(tmp2, tmp1, 8); update_byte_crc32(crc, tmp2, table0); srli(tmp2, tmp1, 8); @@ -2134,8 +2134,8 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, align(CodeEntryAlignment); // Entry for L_unroll_loop - add(loop_buf_end, buf, len); // loop_buf_end will be used as endpoint for loop below - andi(len, len, unroll_words-1); // len = (len % unroll_words) + add(loop_buf_end, buf, len); // loop_buf_end will be used as endpoint for loop below + andi(len, len, unroll_words - 1); // len = (len % unroll_words) sub(loop_buf_end, loop_buf_end, len); bind(L_unroll_loop); for (int i = 0; i < unroll; i++) { @@ -2162,17 +2162,17 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, bind(L_by1_loop); beqz(len, L_exit); - subw(len, len, 1); + subiw(len, len, 1); lbu(tmp1, Address(buf)); update_byte_crc32(crc, tmp1, table0); beqz(len, L_exit); - subw(len, len, 1); + subiw(len, len, 1); lbu(tmp1, Address(buf, 1)); update_byte_crc32(crc, tmp1, table0); beqz(len, L_exit); - subw(len, len, 1); + subiw(len, len, 1); lbu(tmp1, Address(buf, 2)); update_byte_crc32(crc, tmp1, table0); @@ -2238,7 +2238,7 @@ void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) { push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); // Push float registers f0-f7, f10-f17, f28-f31. - addi(sp, sp, - wordSize * 20); + subi(sp, sp, wordSize * 20); int offset = 0; for (int i = 0; i < 32; i++) { if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { @@ -2264,7 +2264,7 @@ void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) push_reg(RegSet::range(x5, x31), sp); // float registers - addi(sp, sp, - 32 * wordSize); + subi(sp, sp, 32 * wordSize); for (int i = 0; i < 32; i++) { fsd(as_FloatRegister(i), Address(sp, i * wordSize)); } @@ -2605,32 +2605,32 @@ void MacroAssembler::movptr2(Register Rd, uint64_t addr, int32_t &offset, Regist offset = lower12; } -void MacroAssembler::add(Register Rd, Register Rn, int64_t increment, Register temp) { +void MacroAssembler::add(Register Rd, Register Rn, int64_t increment, Register tmp) { if (is_simm12(increment)) { addi(Rd, Rn, increment); } else { - assert_different_registers(Rn, temp); - li(temp, increment); - add(Rd, Rn, temp); + assert_different_registers(Rn, tmp); + mv(tmp, increment); + add(Rd, Rn, tmp); } } -void MacroAssembler::addw(Register Rd, Register Rn, int32_t increment, Register temp) { +void MacroAssembler::sub(Register Rd, Register Rn, int64_t decrement, Register tmp) { + add(Rd, Rn, -decrement, tmp); +} + +void MacroAssembler::addw(Register Rd, Register Rn, int32_t increment, Register tmp) { if (is_simm12(increment)) { addiw(Rd, Rn, increment); } else { - assert_different_registers(Rn, temp); - li(temp, increment); - addw(Rd, Rn, temp); + assert_different_registers(Rn, tmp); + mv(tmp, increment); + addw(Rd, Rn, tmp); } } -void MacroAssembler::sub(Register Rd, Register Rn, int64_t decrement, Register temp) { - add(Rd, Rn, -decrement, temp); -} - -void MacroAssembler::subw(Register Rd, Register Rn, int32_t decrement, Register temp) { - addw(Rd, Rn, -decrement, temp); +void MacroAssembler::subw(Register Rd, Register Rn, int32_t decrement, Register tmp) { + addw(Rd, Rn, -decrement, tmp); } void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) { @@ -4153,8 +4153,8 @@ void MacroAssembler::repne_scan(Register addr, Register value, Register count, bind(Lloop); ld(tmp, addr); beq(value, tmp, Lexit); - add(addr, addr, wordSize); - sub(count, count, 1); + addi(addr, addr, wordSize); + subi(count, count, 1); bnez(count, Lloop); bind(Lexit); } @@ -4222,7 +4222,7 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, // Load the array length. lwu(x12, Address(x15, Array::length_offset_in_bytes())); // Skip to start of data. - add(x15, x15, Array::base_offset_in_bytes()); + addi(x15, x15, Array::base_offset_in_bytes()); // Set t0 to an obvious invalid value, falling through by default mv(t0, -1); @@ -4914,11 +4914,11 @@ void MacroAssembler::mul_add(Register out, Register in, Register offset, blt(len, tmp, L_tail_loop); bind(L_unroll); for (int i = 0; i < unroll; i++) { - sub(in, in, BytesPerInt); + subi(in, in, BytesPerInt); lwu(t0, Address(in, 0)); mul(t1, t0, k); add(t0, t1, out); - sub(offset, offset, BytesPerInt); + subi(offset, offset, BytesPerInt); lwu(t1, Address(offset, 0)); add(t0, t0, t1); sw(t0, Address(offset, 0)); @@ -4929,16 +4929,16 @@ void MacroAssembler::mul_add(Register out, Register in, Register offset, bind(L_tail_loop); blez(len, L_end); - sub(in, in, BytesPerInt); + subi(in, in, BytesPerInt); lwu(t0, Address(in, 0)); mul(t1, t0, k); add(t0, t1, out); - sub(offset, offset, BytesPerInt); + subi(offset, offset, BytesPerInt); lwu(t1, Address(offset, 0)); add(t0, t0, t1); sw(t0, Address(offset, 0)); srli(out, t0, 32); - subw(len, len, 1); + subiw(len, len, 1); j(L_tail_loop); bind(L_end); @@ -5015,13 +5015,13 @@ void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register lwu(x_xstart, Address(t0, 0)); bind(L_first_loop); - subw(idx, idx, 1); + subiw(idx, idx, 1); shadd(t0, idx, y, t0, LogBytesPerInt); lwu(y_idx, Address(t0, 0)); mul(product, x_xstart, y_idx); add(product, product, carry); srli(carry, product, 32); - subw(kdx, kdx, 1); + subiw(kdx, kdx, 1); shadd(t0, kdx, z, t0, LogBytesPerInt); sw(product, Address(t0, 0)); bgtz(idx, L_first_loop); @@ -5049,7 +5049,7 @@ void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register Label L_first_loop, L_first_loop_exit; Label L_one_x, L_one_y, L_multiply; - subw(xstart, xstart, 1); + subiw(xstart, xstart, 1); bltz(xstart, L_one_x); shadd(t0, xstart, x, t0, LogBytesPerInt); @@ -5057,9 +5057,9 @@ void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian bind(L_first_loop); - subw(idx, idx, 1); + subiw(idx, idx, 1); bltz(idx, L_first_loop_exit); - subw(idx, idx, 1); + subiw(idx, idx, 1); bltz(idx, L_one_y); shadd(t0, idx, y, t0, LogBytesPerInt); @@ -5072,7 +5072,7 @@ void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register cad(product, product, carry, t1); adc(carry, t0, zr, t1); - subw(kdx, kdx, 2); + subiw(kdx, kdx, 2); ror_imm(product, product, 32); // back to big-endian shadd(t0, kdx, z, t0, LogBytesPerInt); sd(product, Address(t0, 0)); @@ -5170,7 +5170,7 @@ void MacroAssembler::multiply_128_x_128_loop(Register y, Register z, beqz(idx, L_post_third_loop_done); Label L_check_1; - subw(idx, idx, 2); + subiw(idx, idx, 2); bltz(idx, L_check_1); shadd(t0, idx, y, t0, LogBytesPerInt); @@ -5192,7 +5192,7 @@ void MacroAssembler::multiply_128_x_128_loop(Register y, Register z, bind(L_check_1); andi(idx, idx, 0x1); - subw(idx, idx, 1); + subiw(idx, idx, 1); bltz(idx, L_post_third_loop_done); shadd(t0, idx, y, t0, LogBytesPerInt); lwu(tmp4, Address(t0, 0)); @@ -5252,7 +5252,7 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Regi Label L_multiply_64_x_64_loop, L_done; - subw(xstart, xlen, 1); + subiw(xstart, xlen, 1); bltz(xstart, L_done); const Register jdx = tmp1; @@ -5271,9 +5271,9 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Regi bind(L_second_loop_unaligned); mv(carry, zr); mv(jdx, ylen); - subw(xstart, xstart, 1); + subiw(xstart, xstart, 1); bltz(xstart, L_done); - sub(sp, sp, 2 * wordSize); + subi(sp, sp, 2 * wordSize); sd(z, Address(sp, 0)); sd(zr, Address(sp, wordSize)); shadd(t0, xstart, z, t0, LogBytesPerInt); @@ -5285,7 +5285,7 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Regi blez(jdx, L_third_loop_exit); bind(L_third_loop); - subw(jdx, jdx, 1); + subiw(jdx, jdx, 1); shadd(t0, jdx, y, t0, LogBytesPerInt); lwu(t0, Address(t0, 0)); mul(t1, t0, product); @@ -5313,13 +5313,13 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Regi beqz(kdx, L_second_loop_aligned); Label L_carry; - subw(kdx, kdx, 1); + subiw(kdx, kdx, 1); beqz(kdx, L_carry); shadd(t0, kdx, z, t0, LogBytesPerInt); sw(carry, Address(t0, 0)); srli(carry, carry, 32); - subw(kdx, kdx, 1); + subiw(kdx, kdx, 1); bind(L_carry); shadd(t0, kdx, z, t0, LogBytesPerInt); @@ -5344,16 +5344,16 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Regi mv(carry, zr); // carry = 0; mv(jdx, ylen); // j = ystart+1 - subw(xstart, xstart, 1); // i = xstart-1; + subiw(xstart, xstart, 1); // i = xstart-1; bltz(xstart, L_done); - sub(sp, sp, 4 * wordSize); + subi(sp, sp, 4 * wordSize); sd(z, Address(sp, 0)); Label L_last_x; shadd(t0, xstart, z, t0, LogBytesPerInt); addi(z, t0, 4); - subw(xstart, xstart, 1); // i = xstart-1; + subiw(xstart, xstart, 1); // i = xstart-1; bltz(xstart, L_last_x); shadd(t0, xstart, x, t0, LogBytesPerInt); @@ -5378,7 +5378,7 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Regi shadd(t0, tmp3, z, t0, LogBytesPerInt); sw(carry, Address(t0, 0)); - subw(tmp3, tmp3, 1); + subiw(tmp3, tmp3, 1); bltz(tmp3, L_done); srli(carry, carry, 32); @@ -5538,13 +5538,13 @@ void MacroAssembler::zero_words(Register base, uint64_t cnt) { Register loop_base = t1; cnt = cnt - remainder; mv(cnt_reg, cnt); - add(loop_base, base, remainder * wordSize); + addi(loop_base, base, remainder * wordSize); bind(loop); sub(cnt_reg, cnt_reg, unroll); for (int i = 0; i < unroll; i++) { sd(zr, Address(loop_base, i * wordSize)); } - add(loop_base, loop_base, unroll * wordSize); + addi(loop_base, loop_base, unroll * wordSize); bnez(cnt_reg, loop); } @@ -5595,12 +5595,12 @@ void MacroAssembler::fill_words(Register base, Register cnt, Register value) { jr(t1); bind(loop); - add(base, base, unroll * 8); + addi(base, base, unroll * wordSize); for (int i = -unroll; i < 0; i++) { sd(value, Address(base, i * 8)); } bind(entry); - sub(cnt, cnt, unroll); + subi(cnt, cnt, unroll); bgez(cnt, loop); bind(fini); @@ -5639,7 +5639,7 @@ void MacroAssembler::zero_dcache_blocks(Register base, Register cnt, Register tm bind(loop); cbo_zero(base); sub(cnt, cnt, tmp1); - add(base, base, CacheLineSize); + addi(base, base, CacheLineSize); bge(cnt, tmp1, loop); } @@ -5725,7 +5725,7 @@ void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1, /* Rs1 > Rs2, install 1 */ \ bgtz(result, Ldone); \ feq_##FLOATSIG(result, Rs1, Rs2); \ - addi(result, result, -1); \ + subi(result, result, 1); \ /* Rs1 = Rs2, install 0 */ \ /* NaN or Rs1 < Rs2, install -1 */ \ bind(Ldone); \ @@ -5736,7 +5736,7 @@ void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1, /* Rs1 < Rs2, install -1 */ \ bgtz(result, Ldone); \ feq_##FLOATSIG(result, Rs1, Rs2); \ - addi(result, result, -1); \ + subi(result, result, 1); \ /* Rs1 = Rs2, install 0 */ \ /* NaN or Rs1 > Rs2, install 1 */ \ bind(Ldone); \ @@ -6144,10 +6144,10 @@ void MacroAssembler::test_bit(Register Rd, Register Rs, uint32_t bit_pos) { } int64_t imm = (int64_t)(1UL << bit_pos); if (is_simm12(imm)) { - and_imm12(Rd, Rs, imm); + andi(Rd, Rs, imm); } else { srli(Rd, Rs, bit_pos); - and_imm12(Rd, Rd, 1); + andi(Rd, Rd, 1); } } @@ -6200,7 +6200,7 @@ void MacroAssembler::lightweight_lock(Register basic_lock, Register obj, Registe // After successful lock, push object on lock-stack. add(t, xthread, top); sd(obj, Address(t)); - addw(top, top, oopSize); + addiw(top, top, oopSize); sw(top, Address(xthread, JavaThread::lock_stack_top_offset())); } @@ -6232,7 +6232,7 @@ void MacroAssembler::lightweight_unlock(Register obj, Register tmp1, Register tm // Check if obj is top of lock-stack. lwu(top, Address(xthread, JavaThread::lock_stack_top_offset())); - subw(top, top, oopSize); + subiw(top, top, oopSize); add(t, xthread, top); ld(t, Address(t)); bne(obj, t, slow, /* is_far */ true); @@ -6272,7 +6272,7 @@ void MacroAssembler::lightweight_unlock(Register obj, Register tmp1, Register tm // Restore lock-stack and handle the unlock in runtime. DEBUG_ONLY(add(t, xthread, top);) DEBUG_ONLY(sd(obj, Address(t));) - addw(top, top, oopSize); + addiw(top, top, oopSize); sw(top, Address(xthread, JavaThread::lock_stack_top_offset())); j(slow); diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp index 1f5a47bf65acb..6d0bdba05b541 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp @@ -60,14 +60,14 @@ class MacroAssembler: public Assembler { // Note that SP must be updated to the right place before saving/restoring RA and FP // because signal based thread suspend/resume could happen asynchronously. void enter() { - addi(sp, sp, - 2 * wordSize); + subi(sp, sp, 2 * wordSize); sd(ra, Address(sp, wordSize)); sd(fp, Address(sp)); addi(fp, sp, 2 * wordSize); } void leave() { - addi(sp, fp, - 2 * wordSize); + subi(sp, fp, 2 * wordSize); ld(fp, Address(sp)); ld(ra, Address(sp, wordSize)); addi(sp, sp, 2 * wordSize); @@ -886,10 +886,20 @@ class MacroAssembler: public Assembler { public: // arith - void add (Register Rd, Register Rn, int64_t increment, Register temp = t0); - void addw(Register Rd, Register Rn, int32_t increment, Register temp = t0); - void sub (Register Rd, Register Rn, int64_t decrement, Register temp = t0); - void subw(Register Rd, Register Rn, int32_t decrement, Register temp = t0); + void add(Register Rd, Register Rn, int64_t increment, Register tmp = t0); + void sub(Register Rd, Register Rn, int64_t decrement, Register tmp = t0); + void addw(Register Rd, Register Rn, int32_t increment, Register tmp = t0); + void subw(Register Rd, Register Rn, int32_t decrement, Register tmp = t0); + + void subi(Register Rd, Register Rn, int32_t decrement) { + assert(is_simm12(-decrement), "Must be"); + addi(Rd, Rn, -decrement); + } + + void subiw(Register Rd, Register Rn, int32_t decrement) { + assert(is_simm12(-decrement), "Must be"); + addiw(Rd, Rn, -decrement); + } #define INSN(NAME) \ inline void NAME(Register Rd, Register Rs1, Register Rs2) { \ diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad index 7cb42a6b30c3c..a9b48fd18c406 100644 --- a/src/hotspot/cpu/riscv/riscv.ad +++ b/src/hotspot/cpu/riscv/riscv.ad @@ -6482,9 +6482,9 @@ instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAdd src2) %{ ins_encode %{ // src2 is imm, so actually call the addi - __ add(as_Register($dst$$reg), - as_Register($src1$$reg), - $src2$$constant); + __ addi(as_Register($dst$$reg), + as_Register($src1$$reg), + $src2$$constant); %} ins_pipe(ialu_reg_imm); @@ -6513,9 +6513,9 @@ instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{ ins_encode %{ // src2 is imm, so actually call the addi - __ add(as_Register($dst$$reg), - as_Register($src1$$reg), - $src2$$constant); + __ addi(as_Register($dst$$reg), + as_Register($src1$$reg), + $src2$$constant); %} ins_pipe(ialu_reg_imm); @@ -6546,9 +6546,9 @@ instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immISub src2) %{ ins_encode %{ // src2 is imm, so actually call the addiw - __ subw(as_Register($dst$$reg), - as_Register($src1$$reg), - $src2$$constant); + __ subiw(as_Register($dst$$reg), + as_Register($src1$$reg), + $src2$$constant); %} ins_pipe(ialu_reg_imm); @@ -6577,9 +6577,9 @@ instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLSub src2) %{ ins_encode %{ // src2 is imm, so actually call the addi - __ sub(as_Register($dst$$reg), - as_Register($src1$$reg), - $src2$$constant); + __ subi(as_Register($dst$$reg), + as_Register($src1$$reg), + $src2$$constant); %} ins_pipe(ialu_reg_imm); diff --git a/src/hotspot/cpu/riscv/runtime_riscv.cpp b/src/hotspot/cpu/riscv/runtime_riscv.cpp index 441bd1f241f28..33c1fb6e06bfd 100644 --- a/src/hotspot/cpu/riscv/runtime_riscv.cpp +++ b/src/hotspot/cpu/riscv/runtime_riscv.cpp @@ -73,7 +73,7 @@ void OptoRuntime::generate_uncommon_trap_blob() { // Push self-frame. We get here with a return address in RA // and sp should be 16 byte aligned // push fp and retaddr by hand - __ addi(sp, sp, -2 * wordSize); + __ subi(sp, sp, 2 * wordSize); __ sd(ra, Address(sp, wordSize)); __ sd(fp, Address(sp, 0)); // we don't expect an arg reg save area @@ -140,7 +140,7 @@ void OptoRuntime::generate_uncommon_trap_blob() { __ lwu(x12, Address(x14, Deoptimization::UnrollBlock:: size_of_deoptimized_frame_offset())); - __ sub(x12, x12, 2 * wordSize); + __ subi(x12, x12, 2 * wordSize); __ add(sp, sp, x12); __ ld(fp, Address(sp, 0)); __ ld(ra, Address(sp, wordSize)); @@ -188,7 +188,7 @@ void OptoRuntime::generate_uncommon_trap_blob() { Label loop; __ bind(loop); __ ld(x11, Address(x15, 0)); // Load frame size - __ sub(x11, x11, 2 * wordSize); // We'll push pc and fp by hand + __ subi(x11, x11, 2 * wordSize); // We'll push pc and fp by hand __ ld(ra, Address(x12, 0)); // Save return address __ enter(); // and old fp & set new fp __ sub(sp, sp, x11); // Prolog @@ -196,9 +196,9 @@ void OptoRuntime::generate_uncommon_trap_blob() { // This value is corrected by layout_activation_impl __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); __ mv(sender_sp, sp); // Pass sender_sp to next frame - __ add(x15, x15, wordSize); // Bump array pointer (sizes) - __ add(x12, x12, wordSize); // Bump array pointer (pcs) - __ subw(x13, x13, 1); // Decrement counter + __ addi(x15, x15, wordSize); // Bump array pointer (sizes) + __ addi(x12, x12, wordSize); // Bump array pointer (pcs) + __ subiw(x13, x13, 1); // Decrement counter __ bgtz(x13, loop); __ ld(ra, Address(x12, 0)); // save final return address // Re-push self-frame @@ -292,7 +292,7 @@ void OptoRuntime::generate_exception_blob() { // push fp and retaddr by hand // Exception pc is 'return address' for stack walker - __ addi(sp, sp, -2 * wordSize); + __ subi(sp, sp, 2 * wordSize); __ sd(ra, Address(sp, wordSize)); __ sd(fp, Address(sp)); // there are no callee save registers and we don't expect an @@ -346,7 +346,7 @@ void OptoRuntime::generate_exception_blob() { // and we dont' expect an arg reg save area __ ld(fp, Address(sp)); __ ld(x13, Address(sp, wordSize)); - __ addi(sp, sp , 2 * wordSize); + __ addi(sp, sp, 2 * wordSize); // x10: exception handler diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp index 9af1b6a9bb128..e5b31cdccdf0e 100644 --- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp @@ -802,7 +802,7 @@ static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegP if (args[i].first()->is_Register()) { x = x + args[i].first()->as_Register(); } else if (args[i].first()->is_FloatRegister()) { - __ addi(sp, sp, -2 * wordSize); + __ subi(sp, sp, 2 * wordSize); __ fsd(args[i].first()->as_FloatRegister(), Address(sp, 0)); } } @@ -824,7 +824,7 @@ static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMR ; } else if (args[i].first()->is_FloatRegister()) { __ fld(args[i].first()->as_FloatRegister(), Address(sp, 0)); - __ add(sp, sp, 2 * wordSize); + __ addi(sp, sp, 2 * wordSize); } } } @@ -2336,7 +2336,7 @@ void SharedRuntime::generate_deopt_blob() { // Pop deoptimized frame __ lwu(x12, Address(x15, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset())); - __ sub(x12, x12, 2 * wordSize); + __ subi(x12, x12, 2 * wordSize); __ add(sp, sp, x12); __ ld(fp, Address(sp, 0)); __ ld(ra, Address(sp, wordSize)); @@ -2379,7 +2379,7 @@ void SharedRuntime::generate_deopt_blob() { __ bind(loop); __ ld(x9, Address(x14, 0)); // Load frame size __ addi(x14, x14, wordSize); - __ sub(x9, x9, 2 * wordSize); // We'll push pc and fp by hand + __ subi(x9, x9, 2 * wordSize); // We'll push pc and fp by hand __ ld(ra, Address(x12, 0)); // Load pc __ addi(x12, x12, wordSize); __ enter(); // Save old & set new fp @@ -2388,7 +2388,7 @@ void SharedRuntime::generate_deopt_blob() { __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable __ mv(sender_sp, sp); // Pass sender_sp to next frame - __ addi(x13, x13, -1); // Decrement counter + __ subi(x13, x13, 1); // Decrement counter __ bnez(x13, loop); // Re-push self-frame @@ -2566,7 +2566,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(SharedStubId id, address cal #endif // Adjust return pc forward to step over the safepoint poll instruction - __ add(x18, x18, NativeInstruction::instruction_size); + __ addi(x18, x18, NativeInstruction::instruction_size); __ sd(x18, Address(fp, frame::return_addr_offset * wordSize)); } @@ -2736,7 +2736,7 @@ RuntimeStub* SharedRuntime::generate_throw_exception(SharedStubId id, address ru assert(is_even(framesize / 2), "sp not 16-byte aligned"); // ra and fp are already in place - __ addi(sp, fp, 0 - ((unsigned)framesize << LogBytesPerInt)); // prolog + __ subi(sp, fp, (unsigned)framesize << LogBytesPerInt); // prolog int frame_complete = __ pc() - start; diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp index 1afa3545afcf9..84d120ab7c91f 100644 --- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp @@ -341,7 +341,7 @@ class StubGenerator: public StubCodeGenerator { address loop = __ pc(); __ ld(t0, Address(c_rarg5, 0)); __ addi(c_rarg5, c_rarg5, wordSize); - __ addi(c_rarg6, c_rarg6, -1); + __ subi(c_rarg6, c_rarg6, 1); __ push_reg(t0); __ bgtz(c_rarg6, loop); @@ -623,7 +623,7 @@ class StubGenerator: public StubCodeGenerator { __ la(c_rarg2, ExternalAddress((address) StubRoutines::verify_oop_count_addr())); __ ld(c_rarg3, Address(c_rarg2)); - __ add(c_rarg3, c_rarg3, 1); + __ addi(c_rarg3, c_rarg3, 1); __ sd(c_rarg3, Address(c_rarg2)); // object is in x10 @@ -698,8 +698,8 @@ class StubGenerator: public StubCodeGenerator { for (int i = 0; i < MacroAssembler::zero_words_block_size; i++) { __ sd(zr, Address(base, i * wordSize)); } - __ add(base, base, MacroAssembler::zero_words_block_size * wordSize); - __ sub(cnt, cnt, MacroAssembler::zero_words_block_size); + __ addi(base, base, MacroAssembler::zero_words_block_size * wordSize); + __ subi(cnt, cnt, MacroAssembler::zero_words_block_size); __ bge(cnt, tmp1, loop); __ bind(done); } @@ -779,7 +779,7 @@ class StubGenerator: public StubCodeGenerator { __ ld(tmp_reg7, Address(s, 8 * unit)); __ addi(s, s, 8 * unit); - __ sub(count, count, 16); + __ subi(count, count, 16); __ bltz(count, drain); __ bind(again); @@ -805,7 +805,7 @@ class StubGenerator: public StubCodeGenerator { __ addi(s, s, 8 * unit); __ addi(d, d, 8 * unit); - __ sub(count, count, 8); + __ subi(count, count, 8); __ bgez(count, again); // Drain @@ -959,9 +959,9 @@ class StubGenerator: public StubCodeGenerator { } if (is_aligned) { - __ addi(t0, cnt, -32); + __ subi(t0, cnt, 32); __ bgez(t0, copy32_loop); - __ addi(t0, cnt, -8); + __ subi(t0, cnt, 8); __ bgez(t0, copy8_loop, is_far); __ j(copy_small); } else { @@ -985,7 +985,7 @@ class StubGenerator: public StubCodeGenerator { __ addi(src, src, step); __ addi(dst, dst, step); } - __ addi(cnt, cnt, -granularity); + __ subi(cnt, cnt, granularity); __ beqz(cnt, done, is_far); __ j(same_aligned); @@ -996,8 +996,8 @@ class StubGenerator: public StubCodeGenerator { __ bind(copy32_loop); if (is_backwards) { - __ addi(src, src, -wordSize * 4); - __ addi(dst, dst, -wordSize * 4); + __ subi(src, src, wordSize * 4); + __ subi(dst, dst, wordSize * 4); } // we first load 32 bytes, then write it, so the direction here doesn't matter bs_asm->copy_load_at(_masm, decorators, type, 8, tmp3, Address(src), gct1); @@ -1014,19 +1014,19 @@ class StubGenerator: public StubCodeGenerator { __ addi(src, src, wordSize * 4); __ addi(dst, dst, wordSize * 4); } - __ addi(t0, cnt, -(32 + wordSize * 4)); - __ addi(cnt, cnt, -wordSize * 4); + __ subi(t0, cnt, 32 + wordSize * 4); + __ subi(cnt, cnt, wordSize * 4); __ bgez(t0, copy32_loop); // cnt >= 32, do next loop __ beqz(cnt, done); // if that's all - done - __ addi(t0, cnt, -8); // if not - copy the reminder + __ subi(t0, cnt, 8); // if not - copy the reminder __ bltz(t0, copy_small); // cnt < 8, go to copy_small, else fall through to copy8_loop __ bind(copy8_loop); if (is_backwards) { - __ addi(src, src, -wordSize); - __ addi(dst, dst, -wordSize); + __ subi(src, src, wordSize); + __ subi(dst, dst, wordSize); } bs_asm->copy_load_at(_masm, decorators, type, 8, tmp3, Address(src), gct1); bs_asm->copy_store_at(_masm, decorators, type, 8, Address(dst), tmp3, gct1, gct2, gct3); @@ -1035,8 +1035,8 @@ class StubGenerator: public StubCodeGenerator { __ addi(src, src, wordSize); __ addi(dst, dst, wordSize); } - __ addi(t0, cnt, -(8 + wordSize)); - __ addi(cnt, cnt, -wordSize); + __ subi(t0, cnt, 8 + wordSize); + __ subi(cnt, cnt, wordSize); __ bgez(t0, copy8_loop); // cnt >= 8, do next loop __ beqz(cnt, done); // if that's all - done @@ -1054,7 +1054,7 @@ class StubGenerator: public StubCodeGenerator { __ addi(src, src, step); __ addi(dst, dst, step); } - __ addi(cnt, cnt, -granularity); + __ subi(cnt, cnt, granularity); __ bgtz(cnt, copy_small); __ bind(done); @@ -1578,8 +1578,8 @@ class StubGenerator: public StubCodeGenerator { bs->copy_store_at(_masm, decorators, T_OBJECT, element_size, Address(to, 0), copied_oop, gct1, gct2, gct3); - __ add(to, to, UseCompressedOops ? 4 : 8); - __ sub(count, count, 1); + __ addi(to, to, UseCompressedOops ? 4 : 8); + __ subi(count, count, 1); __ beqz(count, L_do_card_marks); // ======== loop entry is here ======== @@ -1587,7 +1587,7 @@ class StubGenerator: public StubCodeGenerator { bs->copy_load_at(_masm, decorators, T_OBJECT, element_size, copied_oop, Address(from, 0), gct1); - __ add(from, from, UseCompressedOops ? 4 : 8); + __ addi(from, from, UseCompressedOops ? 4 : 8); __ beqz(copied_oop, L_store_element); __ load_klass(r9_klass, copied_oop);// query the object klass @@ -1929,9 +1929,9 @@ class StubGenerator: public StubCodeGenerator { t1, L_failed); __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop); - __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + __ addi(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop); - __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + __ addi(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); __ sext(count, scratch_length, 32); // length __ BIND(L_plain_copy); __ j(RuntimeAddress(oop_copy_entry)); @@ -1952,9 +1952,9 @@ class StubGenerator: public StubCodeGenerator { // Marshal the base address arguments now, freeing registers. __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop); - __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + __ addi(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop); - __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + __ addi(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); __ sext(count, length, 32); // length (reloaded) const Register sco_temp = c_rarg3; // this register is free now assert_different_registers(from, to, count, sco_temp, @@ -2068,7 +2068,7 @@ class StubGenerator: public StubCodeGenerator { __ beqz(t0, L_skip_align1); __ sb(value, Address(to, 0)); __ addi(to, to, 1); - __ addiw(count, count, -1); + __ subiw(count, count, 1); __ bind(L_skip_align1); // Fallthrough case T_SHORT: @@ -2077,7 +2077,7 @@ class StubGenerator: public StubCodeGenerator { __ beqz(t0, L_skip_align2); __ sh(value, Address(to, 0)); __ addi(to, to, 2); - __ addiw(count, count, -(2 >> shift)); + __ subiw(count, count, 2 >> shift); __ bind(L_skip_align2); // Fallthrough case T_INT: @@ -2086,7 +2086,7 @@ class StubGenerator: public StubCodeGenerator { __ beqz(t0, L_skip_align4); __ sw(value, Address(to, 0)); __ addi(to, to, 4); - __ addiw(count, count, -(4 >> shift)); + __ subiw(count, count, 4 >> shift); __ bind(L_skip_align4); break; default: ShouldNotReachHere(); @@ -2500,7 +2500,7 @@ class StubGenerator: public StubCodeGenerator { __ mv(isLU ? tmp1 : tmp2, tmp3); __ addi(str1, str1, isLU ? wordSize / 2 : wordSize); __ addi(str2, str2, isLU ? wordSize : wordSize / 2); - __ sub(cnt2, cnt2, wordSize / 2); // Already loaded 4 symbols + __ subi(cnt2, cnt2, wordSize / 2); // Already loaded 4 symbols __ xorr(tmp3, tmp1, tmp2); __ bnez(tmp3, CALCULATE_DIFFERENCE); @@ -2523,10 +2523,10 @@ class StubGenerator: public StubCodeGenerator { __ addi(cnt2, cnt2, -wordSize / 2); // we are now 8-bytes aligned on strL - __ sub(cnt2, cnt2, wordSize * 2); + __ subi(cnt2, cnt2, wordSize * 2); __ bltz(cnt2, TAIL); __ bind(SMALL_LOOP); // smaller loop - __ sub(cnt2, cnt2, wordSize * 2); + __ subi(cnt2, cnt2, wordSize * 2); compare_string_8_x_LU(tmpL, tmpU, strL, strU, CALCULATE_DIFFERENCE); compare_string_8_x_LU(tmpL, tmpU, strL, strU, CALCULATE_DIFFERENCE); __ bgez(cnt2, SMALL_LOOP); @@ -2540,11 +2540,11 @@ class StubGenerator: public StubCodeGenerator { __ bltz(t0, LOAD_LAST); // remaining characters are greater than or equals to 8, we can do one compare_string_8_x_LU compare_string_8_x_LU(tmpL, tmpU, strL, strU, CALCULATE_DIFFERENCE); - __ addi(cnt2, cnt2, -wordSize); + __ subi(cnt2, cnt2, wordSize); __ beqz(cnt2, DONE); // no character left __ bind(LOAD_LAST); // cnt2 = 1..7 characters left - __ addi(cnt2, cnt2, -wordSize); // cnt2 is now an offset in strL which points to last 8 bytes + __ subi(cnt2, cnt2, wordSize); // cnt2 is now an offset in strL which points to last 8 bytes __ slli(t0, cnt2, 1); // t0 is now an offset in strU which points to last 16 bytes __ add(strL, strL, cnt2); // Address of last 8 bytes in Latin1 string __ add(strU, strU, t0); // Address of last 16 bytes in UTF-16 string @@ -2609,9 +2609,9 @@ class StubGenerator: public StubCodeGenerator { __ set_last_Java_frame(sp, fp, ra); __ enter(); - __ add(t1, sp, wordSize); + __ addi(t1, sp, wordSize); - __ sub(sp, sp, 4 * wordSize); + __ subi(sp, sp, 4 * wordSize); __ push_call_clobbered_registers(); @@ -2664,22 +2664,22 @@ class StubGenerator: public StubCodeGenerator { // cnt1/cnt2 contains amount of characters to compare. cnt1 can be re-used // update cnt2 counter with already loaded 8 bytes - __ sub(cnt2, cnt2, wordSize / (isLL ? 1 : 2)); + __ subi(cnt2, cnt2, wordSize / (isLL ? 1 : 2)); // update pointers, because of previous read - __ add(str1, str1, wordSize); - __ add(str2, str2, wordSize); + __ addi(str1, str1, wordSize); + __ addi(str2, str2, wordSize); // less than 16 bytes left? - __ sub(cnt2, cnt2, isLL ? 16 : 8); + __ subi(cnt2, cnt2, isLL ? 16 : 8); __ push_reg(spilled_regs, sp); __ bltz(cnt2, TAIL); __ bind(SMALL_LOOP); compare_string_16_bytes_same(DIFF, DIFF2); - __ sub(cnt2, cnt2, isLL ? 16 : 8); + __ subi(cnt2, cnt2, isLL ? 16 : 8); __ bgez(cnt2, SMALL_LOOP); __ bind(TAIL); __ addi(cnt2, cnt2, isLL ? 16 : 8); __ beqz(cnt2, LAST_CHECK_AND_LENGTH_DIFF); - __ sub(cnt2, cnt2, isLL ? 8 : 4); + __ subi(cnt2, cnt2, isLL ? 8 : 4); __ blez(cnt2, CHECK_LAST); __ xorr(tmp4, tmp1, tmp2); __ bnez(tmp4, DIFF); @@ -2687,7 +2687,7 @@ class StubGenerator: public StubCodeGenerator { __ addi(str1, str1, 8); __ ld(tmp2, Address(str2)); __ addi(str2, str2, 8); - __ sub(cnt2, cnt2, isLL ? 8 : 4); + __ subi(cnt2, cnt2, isLL ? 8 : 4); __ bind(CHECK_LAST); if (!isLL) { __ add(cnt2, cnt2, cnt2); // now in bytes @@ -2798,7 +2798,7 @@ class StubGenerator: public StubCodeGenerator { if (needle_isL != haystack_isL) { __ mv(tmp, ch1); } - __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size - 1); + __ subi(haystack_len, haystack_len, wordSize / haystack_chr_size - 1); __ blez(haystack_len, L_SMALL); if (needle_isL != haystack_isL) { @@ -2814,9 +2814,9 @@ class StubGenerator: public StubCodeGenerator { // search first char of needle, if success, goto L_HAS_ZERO; __ bnez(match_mask, L_HAS_ZERO); - __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size); - __ add(result, result, wordSize / haystack_chr_size); - __ add(haystack, haystack, wordSize); + __ subi(haystack_len, haystack_len, wordSize / haystack_chr_size); + __ addi(result, result, wordSize / haystack_chr_size); + __ addi(haystack, haystack, wordSize); __ bltz(haystack_len, L_POST_LOOP); __ bind(L_LOOP); @@ -2825,9 +2825,9 @@ class StubGenerator: public StubCodeGenerator { __ bnez(match_mask, L_HAS_ZERO); __ bind(L_LOOP_PROCEED); - __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size); - __ add(haystack, haystack, wordSize); - __ add(result, result, wordSize / haystack_chr_size); + __ subi(haystack_len, haystack_len, wordSize / haystack_chr_size); + __ addi(haystack, haystack, wordSize); + __ addi(result, result, wordSize / haystack_chr_size); __ bgez(haystack_len, L_LOOP); __ bind(L_POST_LOOP); @@ -2875,7 +2875,7 @@ class StubGenerator: public StubCodeGenerator { __ shadd(ch2, trailing_zeros, haystack, ch2, haystack_chr_shift); needle_isL ? __ lbu(first, Address(first)) : __ lhu(first, Address(first)); haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2)); - __ add(trailing_zeros, trailing_zeros, 1); + __ addi(trailing_zeros, trailing_zeros, 1); __ bge(trailing_zeros, needle_len, L_SMALL_CMP_LOOP_LAST_CMP); __ beq(first, ch2, L_SMALL_CMP_LOOP); @@ -2883,8 +2883,8 @@ class StubGenerator: public StubCodeGenerator { __ beqz(match_mask, NOMATCH); __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, tmp, ch2); __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15); - __ add(result, result, 1); - __ add(haystack, haystack, haystack_chr_size); + __ addi(result, result, 1); + __ addi(haystack, haystack, haystack_chr_size); __ j(L_SMALL_HAS_ZERO_LOOP); __ align(OptoLoopAlignment); @@ -2904,7 +2904,7 @@ class StubGenerator: public StubCodeGenerator { __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15); __ slli(needle_len, needle_len, BitsPerByte * wordSize / 2); __ orr(haystack_len, haystack_len, needle_len); // restore needle_len(32bits) - __ sub(result, result, 1); // array index from 0, so result -= 1 + __ subi(result, result, 1); // array index from 0, so result -= 1 __ bind(L_HAS_ZERO_LOOP); __ mv(needle_len, wordSize / haystack_chr_size); @@ -2912,7 +2912,7 @@ class StubGenerator: public StubCodeGenerator { __ bge(needle_len, ch2, L_CMP_LOOP_LAST_CMP2); // load next 8 bytes from haystack, and increase result index __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL); - __ add(result, result, 1); + __ addi(result, result, 1); __ mv(trailing_zeros, wordSize / haystack_chr_size); __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH); @@ -2922,7 +2922,7 @@ class StubGenerator: public StubCodeGenerator { needle_isL ? __ lbu(needle_len, Address(needle_len)) : __ lhu(needle_len, Address(needle_len)); __ shadd(ch2, trailing_zeros, haystack, ch2, haystack_chr_shift); haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2)); - __ add(trailing_zeros, trailing_zeros, 1); // next char index + __ addi(trailing_zeros, trailing_zeros, 1); // next char index __ srli(tmp, haystack_len, BitsPerByte * wordSize / 2); __ bge(trailing_zeros, tmp, L_CMP_LOOP_LAST_CMP); __ beq(needle_len, ch2, L_CMP_LOOP); @@ -2931,7 +2931,7 @@ class StubGenerator: public StubCodeGenerator { __ beqz(match_mask, L_HAS_ZERO_LOOP_NOMATCH); __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, needle_len, ch2); // find next "first" char index __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15); - __ add(haystack, haystack, haystack_chr_size); + __ addi(haystack, haystack, haystack_chr_size); __ j(L_HAS_ZERO_LOOP); __ align(OptoLoopAlignment); @@ -2942,7 +2942,7 @@ class StubGenerator: public StubCodeGenerator { __ align(OptoLoopAlignment); __ bind(L_CMP_LOOP_LAST_CMP2); __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL); - __ add(result, result, 1); + __ addi(result, result, 1); __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH); __ j(DONE); @@ -3303,7 +3303,7 @@ class StubGenerator: public StubCodeGenerator { (this->*block)(); bind(odd); (this->*block)(); - addi(count, count, -2); + subi(count, count, 2); bgtz(count, loop); bind(end); } @@ -3319,7 +3319,7 @@ class StubGenerator: public StubCodeGenerator { (this->*block)(d, s, tmp); bind(odd); (this->*block)(d, s, tmp); - addi(count, count, -2); + subi(count, count, 2); bgtz(count, loop); bind(end); } @@ -3371,7 +3371,7 @@ class StubGenerator: public StubCodeGenerator { mul(Rlo_ab, Ra, Rb); addi(Pa, Pa, wordSize); ld(Ra, Address(Pa)); - addi(Pb, Pb, -wordSize); + subi(Pb, Pb, wordSize); ld(Rb, Address(Pb)); acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n from the // previous iteration. @@ -3382,7 +3382,7 @@ class StubGenerator: public StubCodeGenerator { mul(Rlo_mn, Rm, Rn); addi(Pm, Pm, wordSize); ld(Rm, Address(Pm)); - addi(Pn, Pn, -wordSize); + subi(Pn, Pn, wordSize); ld(Rn, Address(Pn)); acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2); } @@ -3426,7 +3426,7 @@ class StubGenerator: public StubCodeGenerator { // // mul(Rlo_mn, Rm, Rn); // cad(zr, tmp0, Rlo_mn); - addi(t0, tmp0, -1); + subi(t0, tmp0, 1); sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero cadc(tmp0, tmp1, Rhi_mn, t0); adc(tmp1, tmp2, zr, t0); @@ -3455,13 +3455,13 @@ class StubGenerator: public StubCodeGenerator { // Rb = *--Pb; // Rm = *++Pm; // Rn = *--Pn; - add(Pa, Pa, wordSize); + addi(Pa, Pa, wordSize); ld(Ra, Address(Pa)); - add(Pb, Pb, -wordSize); + subi(Pb, Pb, wordSize); ld(Rb, Address(Pb)); - add(Pm, Pm, wordSize); + addi(Pm, Pm, wordSize); ld(Rm, Address(Pm)); - add(Pn, Pn, -wordSize); + subi(Pn, Pn, wordSize); ld(Rn, Address(Pn)); mv(Rhi_mn, zr); @@ -3516,15 +3516,15 @@ class StubGenerator: public StubCodeGenerator { slli(Rn, i, LogBytesPerWord); // Rn as temp register add(Rn, Pm_base, Rn); sd(Rm, Address(Rn)); - add(i, i, 1); + addi(i, i, 1); slli(Rn, i, LogBytesPerWord); add(Rm, Pm_base, Rn); ld(Rm, Address(Rm)); add(Rn, Pn_base, Rn); ld(Rn, Address(Rn)); - sub(cnt, cnt, 1); + subi(cnt, cnt, 1); } bnez(cnt, loop); - addi(tmp0, tmp0, -1); + subi(tmp0, tmp0, 1); add(tmp0, tmp0, t0); } bnez(tmp0, again); } bind(post); @@ -3547,7 +3547,7 @@ class StubGenerator: public StubCodeGenerator { } // [63...0] -> [31...0][63...32] void reverse1(Register d, Register s, Register tmp) { - addi(s, s, -wordSize); + subi(s, s, wordSize); ld(tmp, Address(s)); ror_imm(tmp, tmp, 32, t0); sd(tmp, Address(d)); @@ -3584,7 +3584,7 @@ class StubGenerator: public StubCodeGenerator { mul(Rlo_mn, Rm, Rn); addi(Pm, Pm, wordSize); ld(Rm, Address(Pm)); - addi(Pn, Pn, -wordSize); + subi(Pn, Pn, wordSize); ld(Rn, Address(Pn)); } @@ -3619,7 +3619,7 @@ class StubGenerator: public StubCodeGenerator { // // mul(Rlo_mn, Rm, Rn); // cad(zr, tmp, Rlo_mn); - addi(t0, tmp0, -1); + subi(t0, tmp0, 1); sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero cadc(tmp0, tmp1, Rhi_mn, t0); adc(tmp1, tmp2, zr, t0); @@ -3725,7 +3725,7 @@ class StubGenerator: public StubCodeGenerator { } block_comment(" } // j"); post1(); - addw(Ri, Ri, 1); + addiw(Ri, Ri, 1); blt(Ri, Rlen, loop); bind(end); block_comment("} // i"); @@ -3743,12 +3743,12 @@ class StubGenerator: public StubCodeGenerator { block_comment(" for (j = len*2-i-1; j; j--) {"); { slliw(Rj, Rlen, 1); subw(Rj, Rj, Ri); - subw(Rj, Rj, 1); + subiw(Rj, Rj, 1); unroll_2(Rj, &MontgomeryMultiplyGenerator::step); } block_comment(" } // j"); post2(Ri, Rlen); - addw(Ri, Ri, 1); + addiw(Ri, Ri, 1); slli(t0, Rlen, 1); blt(Ri, t0, loop); bind(end); @@ -3859,7 +3859,7 @@ class StubGenerator: public StubCodeGenerator { block_comment(" for (j = (2*len-i-1)/2; j; j--) {"); { slli(Rj, Rlen, 1); sub(Rj, Rj, Ri); - sub(Rj, Rj, 1); + subi(Rj, Rj, 1); srliw(Rj, Rj, 1); unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring); } block_comment(" } // j"); @@ -3921,7 +3921,7 @@ class StubGenerator: public StubCodeGenerator { if (return_barrier) { // preserve possible return value from a method returning to the return barrier - __ sub(sp, sp, 2 * wordSize); + __ subi(sp, sp, 2 * wordSize); __ fsd(f10, Address(sp, 0 * wordSize)); __ sd(x10, Address(sp, 1 * wordSize)); } @@ -3934,7 +3934,7 @@ class StubGenerator: public StubCodeGenerator { // restore return value (no safepoint in the call to thaw, so even an oop return value should be OK) __ ld(x10, Address(sp, 1 * wordSize)); __ fld(f10, Address(sp, 0 * wordSize)); - __ add(sp, sp, 2 * wordSize); + __ addi(sp, sp, 2 * wordSize); } #ifndef PRODUCT @@ -3959,7 +3959,7 @@ class StubGenerator: public StubCodeGenerator { if (return_barrier) { // save original return value -- again - __ sub(sp, sp, 2 * wordSize); + __ subi(sp, sp, 2 * wordSize); __ fsd(f10, Address(sp, 0 * wordSize)); __ sd(x10, Address(sp, 1 * wordSize)); } @@ -3974,14 +3974,14 @@ class StubGenerator: public StubCodeGenerator { // restore return value (no safepoint in the call to thaw, so even an oop return value should be OK) __ ld(x10, Address(sp, 1 * wordSize)); __ fld(f10, Address(sp, 0 * wordSize)); - __ add(sp, sp, 2 * wordSize); + __ addi(sp, sp, 2 * wordSize); } else { __ mv(x10, zr); // return 0 (success) from doYield } // we're now on the yield frame (which is in an address above us b/c sp has been pushed down) __ mv(fp, t1); - __ sub(sp, t1, 2 * wordSize); // now pointing to fp spill + __ subi(sp, t1, 2 * wordSize); // now pointing to fp spill if (return_barrier_exception) { __ ld(c_rarg1, Address(fp, -1 * wordSize)); // return address @@ -4438,7 +4438,7 @@ class StubGenerator: public StubCodeGenerator { if (multi_block) { int total_adds = vset_sew == Assembler::e32 ? 240 : 608; __ addi(consts, consts, -total_adds); - __ add(ofs, ofs, vset_sew == Assembler::e32 ? 64 : 128); + __ addi(ofs, ofs, vset_sew == Assembler::e32 ? 64 : 128); __ ble(ofs, limit, multi_block_loop); __ mv(c_rarg0, ofs); // return ofs } @@ -4932,7 +4932,7 @@ class StubGenerator: public StubCodeGenerator { chacha20_quarter_round(work_vrs[2], work_vrs[7], work_vrs[8], work_vrs[13], tmp_vr); chacha20_quarter_round(work_vrs[3], work_vrs[4], work_vrs[9], work_vrs[14], tmp_vr); - __ sub(loop, loop, 1); + __ subi(loop, loop, 1); __ bnez(loop, L_Rounds); } @@ -5512,7 +5512,7 @@ class StubGenerator: public StubCodeGenerator { __ sb(byte2, Address(dst, 2)); __ sb(combined24Bits, Address(dst, 3)); - __ sub(length, length, 3); + __ subi(length, length, 3); __ addi(dst, dst, 4); // loop back __ bnez(length, ScalarLoop); @@ -5789,7 +5789,7 @@ class StubGenerator: public StubCodeGenerator { __ sb(byte1, Address(dst, 1)); __ sb(combined32Bits, Address(dst, 2)); - __ sub(length, length, 4); + __ subi(length, length, 4); __ addi(dst, dst, 3); // loop back __ bnez(length, ScalarLoop); @@ -5972,23 +5972,23 @@ class StubGenerator: public StubCodeGenerator { __ beqz(len, L_combine); // Jumping to L_by1_loop - __ sub(len, len, step_1); + __ subi(len, len, step_1); __ j(L_by1_loop); __ bind(L_nmax); __ sub(len, len, nmax); - __ sub(count, nmax, 16); + __ subi(count, nmax, 16); __ bltz(len, L_by16); // Align L_nmax loop by 64 __ bind(L_nmax_loop_entry); - __ sub(count, count, 32); + __ subi(count, count, 32); __ bind(L_nmax_loop); adler32_process_bytes(buff, s1, s2, vtable_64, vzero, vbytes, vs1acc, vs2acc, temp0, temp1, temp2, temp3, vtemp1, vtemp2, step_64, Assembler::m4); - __ sub(count, count, step_64); + __ subi(count, count, step_64); __ bgtz(count, L_nmax_loop); // There are three iterations left to do @@ -6005,7 +6005,7 @@ class StubGenerator: public StubCodeGenerator { __ remuw(s2, s2, base); __ sub(len, len, nmax); - __ sub(count, nmax, 16); + __ subi(count, nmax, 16); __ bgez(len, L_nmax_loop_entry); __ bind(L_by16); @@ -6019,7 +6019,7 @@ class StubGenerator: public StubCodeGenerator { adler32_process_bytes(buff, s1, s2, vtable_64, vzero, vbytes, vs1acc, vs2acc, temp0, temp1, temp2, temp3, vtemp1, vtemp2, step_64, Assembler::m4); - __ sub(len, len, step_64); + __ subi(len, len, step_64); // By now the temp3 should still be 64 __ bge(len, temp3, L_by16_loop_unroll); @@ -6027,11 +6027,11 @@ class StubGenerator: public StubCodeGenerator { adler32_process_bytes(buff, s1, s2, vtable_16, vzero, vbytes, vs1acc, vs2acc, temp0, temp1, temp2, temp3, vtemp1, vtemp2, step_16, Assembler::m1); - __ sub(len, len, step_16); + __ subi(len, len, step_16); __ bgez(len, L_by16_loop); __ bind(L_by1); - __ add(len, len, 15); + __ addi(len, len, 15); __ bltz(len, L_do_mod); __ bind(L_by1_loop); @@ -6039,7 +6039,7 @@ class StubGenerator: public StubCodeGenerator { __ addi(buff, buff, step_1); __ add(s1, temp0, s1); __ add(s2, s2, s1); - __ sub(len, len, step_1); + __ subi(len, len, step_1); __ bgez(len, L_by1_loop); __ bind(L_do_mod); @@ -6221,7 +6221,7 @@ static const int64_t right_3_bits = right_n_bits(3); // U_2:U_1:U_0: += (U_2 >> 2) * 5 poly1305_reduce(U_2, U_1, U_0, t1, t2); - __ sub(length, length, BLOCK_LENGTH); + __ subi(length, length, BLOCK_LENGTH); __ addi(input_start, input_start, BLOCK_LENGTH); __ mv(t1, BLOCK_LENGTH); __ bge(length, t1, LOOP); diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp index bc67de54c4bc6..38cc88cbd35d8 100644 --- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp @@ -85,8 +85,8 @@ address TemplateInterpreterGenerator::generate_slow_signature_handler() { // c_rarg3: first stack arg - wordSize // adjust sp - __ addi(sp, c_rarg3, -18 * wordSize); - __ addi(sp, sp, -2 * wordSize); + __ subi(sp, c_rarg3, 18 * wordSize); + __ subi(sp, sp, 2 * wordSize); __ sd(ra, Address(sp, 0)); __ call_VM(noreg, @@ -742,8 +742,8 @@ void TemplateInterpreterGenerator::lock_method() { // add space for monitor & lock __ check_extended_sp(); - __ add(sp, sp, - entry_size); // add space for a monitor entry - __ add(esp, esp, - entry_size); + __ sub(sp, sp, entry_size); // add space for a monitor entry + __ sub(esp, esp, entry_size); __ sub(t0, sp, fp); __ srai(t0, t0, Interpreter::logStackElementSize); __ sd(t0, Address(fp, frame::interpreter_frame_extended_sp_offset * wordSize)); @@ -768,17 +768,17 @@ void TemplateInterpreterGenerator::lock_method() { void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { // initialize fixed part of activation frame if (native_call) { - __ add(esp, sp, - 14 * wordSize); + __ subi(esp, sp, 14 * wordSize); __ mv(xbcp, zr); - __ add(sp, sp, - 14 * wordSize); + __ subi(sp, sp, 14 * wordSize); // add 2 zero-initialized slots for native calls __ sd(zr, Address(sp, 13 * wordSize)); __ sd(zr, Address(sp, 12 * wordSize)); } else { - __ add(esp, sp, - 12 * wordSize); + __ subi(esp, sp, 12 * wordSize); __ ld(t0, Address(xmethod, Method::const_offset())); // get ConstMethod __ add(xbcp, t0, in_bytes(ConstMethod::codes_offset())); // get codebase - __ add(sp, sp, - 12 * wordSize); + __ subi(sp, sp, 12 * wordSize); } __ sd(xbcp, Address(sp, wordSize)); __ mv(t0, frame::interpreter_frame_initial_sp_offset); @@ -833,7 +833,7 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { } else { // Make sure there is room for the exception oop pushed in case method throws // an exception (see TemplateInterpreterGenerator::generate_throw_exception()) - __ sub(t0, sp, 2 * wordSize); + __ subi(t0, sp, 2 * wordSize); __ sub(t1, t0, fp); __ srai(t1, t1, Interpreter::logStackElementSize); __ sd(t1, Address(sp, 5 * wordSize)); @@ -1018,7 +1018,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { // compute beginning of parameters (xlocals) __ shadd(xlocals, x12, esp, xlocals, 3); - __ addi(xlocals, xlocals, -wordSize); + __ subi(xlocals, xlocals, wordSize); // Pull SP back to minimum size: this avoids holes in the stack __ andi(sp, esp, -16); @@ -1175,7 +1175,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { { Label L; __ lwu(t, Address(xthread, JavaThread::thread_state_offset())); - __ addi(t0, zr, (u1)_thread_in_Java); + __ mv(t0, (u1)_thread_in_Java); __ beq(t, t0, L); __ stop("Wrong thread state in native stub"); __ bind(L); @@ -1202,7 +1202,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { __ restore_cpu_control_state_after_jni(t0); // make room for the pushes we're about to do - __ sub(t0, esp, 4 * wordSize); + __ subi(t0, esp, 4 * wordSize); __ andi(sp, t0, -16); // NOTE: The order of these pushes is known to frame::interpreter_frame_result @@ -1308,7 +1308,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { { Label no_reguard; __ lwu(t0, Address(xthread, in_bytes(JavaThread::stack_guard_state_offset()))); - __ addi(t1, zr, (u1)StackOverflow::stack_guard_yellow_reserved_disabled); + __ mv(t1, (u1)StackOverflow::stack_guard_yellow_reserved_disabled); __ bne(t0, t1, no_reguard); __ push_call_clobbered_registers(); @@ -1440,7 +1440,7 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { // compute beginning of parameters (xlocals) __ shadd(xlocals, x12, esp, t1, 3); - __ add(xlocals, xlocals, -wordSize); + __ subi(xlocals, xlocals, wordSize); // Make room for additional locals __ slli(t1, x13, 3); @@ -1458,8 +1458,8 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { __ blez(x13, exit); // do nothing if x13 <= 0 __ bind(loop); __ sd(zr, Address(t0)); - __ add(t0, t0, wordSize); - __ add(x13, x13, -1); // until everything initialized + __ addi(t0, t0, wordSize); + __ subi(x13, x13, 1); // until everything initialized __ bnez(x13, loop); __ bind(exit); } @@ -1650,7 +1650,7 @@ void TemplateInterpreterGenerator::generate_throw_exception() { __ slli(x10, x10, Interpreter::logStackElementSize); __ restore_locals(); __ sub(xlocals, xlocals, x10); - __ add(xlocals, xlocals, wordSize); + __ addi(xlocals, xlocals, wordSize); // Save these arguments __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization:: @@ -1745,7 +1745,7 @@ void TemplateInterpreterGenerator::generate_throw_exception() { // sp: expression stack of caller // fp: fp of caller // FIXME: There's no point saving ra here because VM calls don't trash it - __ sub(sp, sp, 2 * wordSize); + __ subi(sp, sp, 2 * wordSize); __ sd(x10, Address(sp, 0)); // save exception __ sd(ra, Address(sp, wordSize)); // save return address __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, @@ -1754,7 +1754,7 @@ void TemplateInterpreterGenerator::generate_throw_exception() { __ mv(x11, x10); // save exception handler __ ld(x10, Address(sp, 0)); // restore exception __ ld(ra, Address(sp, wordSize)); // restore return address - __ add(sp, sp, 2 * wordSize); + __ addi(sp, sp, 2 * wordSize); // We might be returning to a deopt handler that expects x13 to // contain the exception pc __ mv(x13, ra); diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp index e51604569f688..0bc0f6461571b 100644 --- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp +++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp @@ -197,7 +197,7 @@ void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg, Label L_fast_patch; // if a breakpoint is present we can't rewrite the stream directly __ load_unsigned_byte(temp_reg, at_bcp(0)); - __ addi(temp_reg, temp_reg, -Bytecodes::_breakpoint); // temp_reg is temporary register. + __ sub(temp_reg, temp_reg, Bytecodes::_breakpoint); // temp_reg is temporary register. __ bnez(temp_reg, L_fast_patch); // Let breakpoint table handling rewrite to quicker bytecode __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), xmethod, xbcp, bc_reg); @@ -209,7 +209,7 @@ void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg, Label L_okay; __ load_unsigned_byte(temp_reg, at_bcp(0)); __ beq(temp_reg, bc_reg, L_okay); - __ addi(temp_reg, temp_reg, -(int) Bytecodes::java_code(bc)); + __ sub(temp_reg, temp_reg, (int)Bytecodes::java_code(bc)); __ beqz(temp_reg, L_okay); __ stop("patching the wrong bytecode"); __ bind(L_okay); @@ -737,7 +737,7 @@ void TemplateTable::iaload() { // x10: array // x11: index index_check(x10, x11); // leaves index in x11 - __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2); + __ addi(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2); __ shadd(x10, x11, x10, t0, 2); __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); __ sext(x10, x10, 32); @@ -750,7 +750,7 @@ void TemplateTable::laload() { // x10: array // x11: index index_check(x10, x11); // leaves index in x11 - __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3); + __ addi(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3); __ shadd(x10, x11, x10, t0, 3); __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); } @@ -762,7 +762,7 @@ void TemplateTable::faload() { // x10: array // x11: index index_check(x10, x11); // leaves index in x11 - __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2); + __ addi(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2); __ shadd(x10, x11, x10, t0, 2); __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); } @@ -774,7 +774,7 @@ void TemplateTable::daload() { // x10: array // x11: index index_check(x10, x11); // leaves index in x11 - __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3); + __ addi(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3); __ shadd(x10, x11, x10, t0, 3); __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); } @@ -786,7 +786,7 @@ void TemplateTable::aaload() { // x10: array // x11: index index_check(x10, x11); // leaves index in x11 - __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); + __ addi(x11, x11, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); __ shadd(x10, x11, x10, t0, LogBytesPerHeapOop); do_oop_load(_masm, Address(x10), x10, IS_ARRAY); } @@ -798,7 +798,7 @@ void TemplateTable::baload() { // x10: array // x11: index index_check(x10, x11); // leaves index in x11 - __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0); + __ addi(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0); __ shadd(x10, x11, x10, t0, 0); __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); } @@ -810,7 +810,7 @@ void TemplateTable::caload() { // x10: array // x11: index index_check(x10, x11); // leaves index in x11 - __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); + __ addi(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); __ shadd(x10, x11, x10, t0, 1); __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); } @@ -826,7 +826,7 @@ void TemplateTable::fast_icaload() { // x10: array // x11: index index_check(x10, x11); // leaves index in x11, kills t0 - __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); // addi, max imm is 2^11 + __ addi(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); // addi, max imm is 2^11 __ shadd(x10, x11, x10, t0, 1); __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); } @@ -838,7 +838,7 @@ void TemplateTable::saload() { // x10: array // x11: index index_check(x10, x11); // leaves index in x11, kills t0 - __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_SHORT) >> 1); + __ addi(x11, x11, arrayOopDesc::base_offset_in_bytes(T_SHORT) >> 1); __ shadd(x10, x11, x10, t0, 1); __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); } @@ -1017,7 +1017,7 @@ void TemplateTable::iastore() { // x11: index // x13: array index_check(x13, x11); // prefer index in x11 - __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2); + __ addi(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2); __ shadd(t0, x11, x13, t0, 2); __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg, noreg); } @@ -1030,7 +1030,7 @@ void TemplateTable::lastore() { // x11: index // x13: array index_check(x13, x11); // prefer index in x11 - __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3); + __ addi(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3); __ shadd(t0, x11, x13, t0, 3); __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg, noreg); } @@ -1043,7 +1043,7 @@ void TemplateTable::fastore() { // x11: index // x13: array index_check(x13, x11); // prefer index in x11 - __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2); + __ addi(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2); __ shadd(t0, x11, x13, t0, 2); __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* ftos */, noreg, noreg, noreg); } @@ -1056,7 +1056,7 @@ void TemplateTable::dastore() { // x11: index // x13: array index_check(x13, x11); // prefer index in x11 - __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3); + __ addi(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3); __ shadd(t0, x11, x13, t0, 3); __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* dtos */, noreg, noreg, noreg); } @@ -1070,7 +1070,7 @@ void TemplateTable::aastore() { __ ld(x13, at_tos_p2()); // array index_check(x13, x12); // kills x11 - __ add(x14, x12, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); + __ addi(x14, x12, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); __ shadd(x14, x14, x13, x14, LogBytesPerHeapOop); Address element_address(x14, 0); @@ -1134,7 +1134,7 @@ void TemplateTable::bastore() { __ andi(x10, x10, 1); // if it is a T_BOOLEAN array, mask the stored value to 0/1 __ bind(L_skip); - __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0); + __ addi(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0); __ add(x11, x13, x11); __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(x11, 0), x10, noreg, noreg, noreg); @@ -1148,7 +1148,7 @@ void TemplateTable::castore() { // x11: index // x13: array index_check(x13, x11); // prefer index in x11 - __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); + __ addi(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); __ shadd(t0, x11, x13, t0, 1); __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg, noreg); } @@ -1185,12 +1185,12 @@ void TemplateTable::astore(int n) { void TemplateTable::pop() { transition(vtos, vtos); - __ addi(esp, esp, Interpreter::stackElementSize); + __ add(esp, esp, Interpreter::stackElementSize); } void TemplateTable::pop2() { transition(vtos, vtos); - __ addi(esp, esp, 2 * Interpreter::stackElementSize); + __ add(esp, esp, 2 * Interpreter::stackElementSize); } void TemplateTable::dup() { @@ -1883,7 +1883,7 @@ void TemplateTable::ret() { __ profile_ret(x11, x12); __ ld(xbcp, Address(xmethod, Method::const_offset())); __ add(xbcp, xbcp, x11); - __ addi(xbcp, xbcp, in_bytes(ConstMethod::codes_offset())); + __ add(xbcp, xbcp, in_bytes(ConstMethod::codes_offset())); __ dispatch_next(vtos, 0, /*generate_poll*/true); } @@ -1958,7 +1958,7 @@ void TemplateTable::fast_linearswitch() { __ lw(t0, Address(t0, 2 * BytesPerInt)); __ beq(x10, t0, found); __ bind(loop_entry); - __ addi(x11, x11, -1); + __ subi(x11, x11, 1); __ bgez(x11, loop); // default case __ profile_switch_default(x10); @@ -2544,7 +2544,7 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr __ j(Done); __ bind(notByte); - __ sub(t0, tos_state, (u1)ztos); + __ subi(t0, tos_state, (u1)ztos); __ bnez(t0, notBool); // ztos (same code as btos) @@ -2558,7 +2558,7 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr __ j(Done); __ bind(notBool); - __ sub(t0, tos_state, (u1)atos); + __ subi(t0, tos_state, (u1)atos); __ bnez(t0, notObj); // atos do_oop_load(_masm, field, x10, IN_HEAP); @@ -2569,7 +2569,7 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr __ j(Done); __ bind(notObj); - __ sub(t0, tos_state, (u1)itos); + __ subi(t0, tos_state, (u1)itos); __ bnez(t0, notInt); // itos __ access_load_at(T_INT, IN_HEAP, x10, field, noreg, noreg); @@ -2582,7 +2582,7 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr __ j(Done); __ bind(notInt); - __ sub(t0, tos_state, (u1)ctos); + __ subi(t0, tos_state, (u1)ctos); __ bnez(t0, notChar); // ctos __ access_load_at(T_CHAR, IN_HEAP, x10, field, noreg, noreg); @@ -2594,7 +2594,7 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr __ j(Done); __ bind(notChar); - __ sub(t0, tos_state, (u1)stos); + __ subi(t0, tos_state, (u1)stos); __ bnez(t0, notShort); // stos __ access_load_at(T_SHORT, IN_HEAP, x10, field, noreg, noreg); @@ -2606,7 +2606,7 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr __ j(Done); __ bind(notShort); - __ sub(t0, tos_state, (u1)ltos); + __ subi(t0, tos_state, (u1)ltos); __ bnez(t0, notLong); // ltos __ access_load_at(T_LONG, IN_HEAP, x10, field, noreg, noreg); @@ -2618,7 +2618,7 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr __ j(Done); __ bind(notLong); - __ sub(t0, tos_state, (u1)ftos); + __ subi(t0, tos_state, (u1)ftos); __ bnez(t0, notFloat); // ftos __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg); @@ -2631,7 +2631,7 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr __ bind(notFloat); #ifdef ASSERT - __ sub(t0, tos_state, (u1)dtos); + __ subi(t0, tos_state, (u1)dtos); __ bnez(t0, notDouble); #endif // dtos @@ -2696,9 +2696,9 @@ void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is __ load_unsigned_byte(c_rarg3, Address(c_rarg2, in_bytes(ResolvedFieldEntry::type_offset()))); Label nope2, done, ok; __ ld(c_rarg1, at_tos_p1()); // initially assume a one word jvalue - __ sub(t0, c_rarg3, ltos); + __ subi(t0, c_rarg3, (u1)ltos); __ beqz(t0, ok); - __ sub(t0, c_rarg3, dtos); + __ subi(t0, c_rarg3, (u1)dtos); __ bnez(t0, nope2); __ bind(ok); __ ld(c_rarg1, at_tos_p2()); // ltos (two word jvalue); @@ -2772,7 +2772,7 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr } __ bind(notByte); - __ sub(t0, tos_state, (u1)ztos); + __ subi(t0, tos_state, (u1)ztos); __ bnez(t0, notBool); // ztos @@ -2792,7 +2792,7 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr } __ bind(notBool); - __ sub(t0, tos_state, (u1)atos); + __ subi(t0, tos_state, (u1)atos); __ bnez(t0, notObj); // atos @@ -2813,7 +2813,7 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr } __ bind(notObj); - __ sub(t0, tos_state, (u1)itos); + __ subi(t0, tos_state, (u1)itos); __ bnez(t0, notInt); // itos @@ -2833,7 +2833,7 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr } __ bind(notInt); - __ sub(t0, tos_state, (u1)ctos); + __ subi(t0, tos_state, (u1)ctos); __ bnez(t0, notChar); // ctos @@ -2853,7 +2853,7 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr } __ bind(notChar); - __ sub(t0, tos_state, (u1)stos); + __ subi(t0, tos_state, (u1)stos); __ bnez(t0, notShort); // stos @@ -2873,7 +2873,7 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr } __ bind(notShort); - __ sub(t0, tos_state, (u1)ltos); + __ subi(t0, tos_state, (u1)ltos); __ bnez(t0, notLong); // ltos @@ -2893,7 +2893,7 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr } __ bind(notLong); - __ sub(t0, tos_state, (u1)ftos); + __ subi(t0, tos_state, (u1)ftos); __ bnez(t0, notFloat); // ftos @@ -2914,7 +2914,7 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr __ bind(notFloat); #ifdef ASSERT - __ sub(t0, tos_state, (u1)dtos); + __ subi(t0, tos_state, (u1)dtos); __ bnez(t0, notDouble); #endif @@ -3207,7 +3207,7 @@ void TemplateTable::fast_xaccess(TosState state) { __ bind(notVolatile); } - __ sub(xbcp, xbcp, 1); + __ subi(xbcp, xbcp, 1); } //----------------------------------------------------------------------------- @@ -3521,7 +3521,7 @@ void TemplateTable::_new() { __ la(t0, Address(t0, tags_offset)); __ lbu(t0, t0); __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); - __ sub(t1, t0, (u1)JVM_CONSTANT_Class); + __ subi(t1, t0, (u1)JVM_CONSTANT_Class); __ bnez(t1, slow_case); // get InstanceKlass @@ -3558,9 +3558,9 @@ void TemplateTable::_new() { // zero, go directly to the header initialization. if (UseCompactObjectHeaders) { assert(is_aligned(oopDesc::base_offset_in_bytes(), BytesPerLong), "oop base offset must be 8-byte-aligned"); - __ sub(x13, x13, oopDesc::base_offset_in_bytes()); + __ subi(x13, x13, oopDesc::base_offset_in_bytes()); } else { - __ sub(x13, x13, sizeof(oopDesc)); + __ subi(x13, x13, sizeof(oopDesc)); } __ beqz(x13, initialize_header); @@ -3568,15 +3568,15 @@ void TemplateTable::_new() { { if (UseCompactObjectHeaders) { assert(is_aligned(oopDesc::base_offset_in_bytes(), BytesPerLong), "oop base offset must be 8-byte-aligned"); - __ add(x12, x10, oopDesc::base_offset_in_bytes()); + __ addi(x12, x10, oopDesc::base_offset_in_bytes()); } else { - __ add(x12, x10, sizeof(oopDesc)); + __ addi(x12, x10, sizeof(oopDesc)); } Label loop; __ bind(loop); __ sd(zr, Address(x12)); - __ add(x12, x12, BytesPerLong); - __ sub(x13, x13, BytesPerLong); + __ addi(x12, x12, BytesPerLong); + __ subi(x13, x13, BytesPerLong); __ bnez(x13, loop); } @@ -3649,11 +3649,11 @@ void TemplateTable::checkcast() { __ get_cpool_and_tags(x12, x13); // x12=cpool, x13=tags array __ get_unsigned_2_byte_index_at_bcp(x9, 1); // x9=index // See if bytecode has already been quicked - __ add(t0, x13, Array::base_offset_in_bytes()); + __ addi(t0, x13, Array::base_offset_in_bytes()); __ add(x11, t0, x9); __ lbu(x11, x11); __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); - __ sub(t0, x11, (u1)JVM_CONSTANT_Class); + __ subi(t0, x11, (u1)JVM_CONSTANT_Class); __ beqz(t0, quicked); __ push(atos); // save receiver for result, and for GC @@ -3704,11 +3704,11 @@ void TemplateTable::instanceof() { __ get_cpool_and_tags(x12, x13); // x12=cpool, x13=tags array __ get_unsigned_2_byte_index_at_bcp(x9, 1); // x9=index // See if bytecode has already been quicked - __ add(t0, x13, Array::base_offset_in_bytes()); + __ addi(t0, x13, Array::base_offset_in_bytes()); __ add(x11, t0, x9); __ lbu(x11, x11); __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); - __ sub(t0, x11, (u1)JVM_CONSTANT_Class); + __ subi(t0, x11, (u1)JVM_CONSTANT_Class); __ beqz(t0, quicked); __ push(atos); // save receiver for result, and for GC @@ -3884,7 +3884,7 @@ void TemplateTable::monitorenter() { __ ld(c_rarg2, Address(c_rarg3, entry_size)); // load expression stack // word from old location __ sd(c_rarg2, Address(c_rarg3, 0)); // and store it at new location - __ add(c_rarg3, c_rarg3, wordSize); // advance to next word + __ addi(c_rarg3, c_rarg3, wordSize); // advance to next word __ bind(entry); __ bne(c_rarg3, c_rarg1, loop); // check if bottom reached.if not at bottom // then copy next word @@ -3979,7 +3979,7 @@ void TemplateTable::multianewarray() { // last dim is on top of stack; we want address of first one: // first_addr = last_addr + (ndims - 1) * wordSize __ shadd(c_rarg1, x10, esp, c_rarg1, 3); - __ sub(c_rarg1, c_rarg1, wordSize); + __ subi(c_rarg1, c_rarg1, wordSize); call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), c_rarg1); From 4f8a66625c48a9b61139bd1ae28d4e03cd38c1da Mon Sep 17 00:00:00 2001 From: Fei Yang Date: Thu, 19 Dec 2024 23:44:13 +0800 Subject: [PATCH 2/5] Revert unnecessary changes --- src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 4 ++-- src/hotspot/cpu/riscv/templateTable_riscv.cpp | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp index 4299d040b8330..953d162deee91 100644 --- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp @@ -266,7 +266,7 @@ void InterpreterMacroAssembler::pop_i(Register r) { void InterpreterMacroAssembler::pop_l(Register r) { ld(r, Address(esp, 0)); - add(esp, esp, 2 * Interpreter::stackElementSize); + addi(esp, esp, 2 * Interpreter::stackElementSize); } void InterpreterMacroAssembler::push_ptr(Register r) { @@ -293,7 +293,7 @@ void InterpreterMacroAssembler::pop_f(FloatRegister r) { void InterpreterMacroAssembler::pop_d(FloatRegister r) { fld(r, Address(esp, 0)); - add(esp, esp, 2 * Interpreter::stackElementSize); + addi(esp, esp, 2 * Interpreter::stackElementSize); } void InterpreterMacroAssembler::push_f(FloatRegister r) { diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp index 0bc0f6461571b..8e05a63d0e99d 100644 --- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp +++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp @@ -1112,7 +1112,7 @@ void TemplateTable::aastore() { // Pop stack arguments __ bind(done); - __ add(esp, esp, 3 * Interpreter::stackElementSize); + __ addi(esp, esp, 3 * Interpreter::stackElementSize); } void TemplateTable::bastore() { @@ -1185,12 +1185,12 @@ void TemplateTable::astore(int n) { void TemplateTable::pop() { transition(vtos, vtos); - __ add(esp, esp, Interpreter::stackElementSize); + __ addi(esp, esp, Interpreter::stackElementSize); } void TemplateTable::pop2() { transition(vtos, vtos); - __ add(esp, esp, 2 * Interpreter::stackElementSize); + __ addi(esp, esp, 2 * Interpreter::stackElementSize); } void TemplateTable::dup() { From b27fc6238756c93c0b1ab079b9cb156f533444d3 Mon Sep 17 00:00:00 2001 From: Fei Yang Date: Sat, 21 Dec 2024 12:06:20 +0800 Subject: [PATCH 3/5] Improve naming for rotate routines --- src/hotspot/cpu/riscv/assembler_riscv.hpp | 8 ++--- .../cpu/riscv/macroAssembler_riscv.cpp | 34 +++++++++---------- .../cpu/riscv/macroAssembler_riscv.hpp | 4 +-- src/hotspot/cpu/riscv/riscv_b.ad | 8 ++--- src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 12 +++---- 5 files changed, 33 insertions(+), 33 deletions(-) diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp index 31713d7362a18..a9d601edbbef5 100644 --- a/src/hotspot/cpu/riscv/assembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp @@ -2015,10 +2015,10 @@ enum Nf { } INSN(add_uw, 0b0111011, 0b000, 0b0000100); - INSN(rol, 0b0110011, 0b001, 0b0110000); - INSN(rolw, 0b0111011, 0b001, 0b0110000); - INSN(ror, 0b0110011, 0b101, 0b0110000); - INSN(rorw, 0b0111011, 0b101, 0b0110000); + INSN(rolr, 0b0110011, 0b001, 0b0110000); + INSN(rolrw, 0b0111011, 0b001, 0b0110000); + INSN(rorr, 0b0110011, 0b101, 0b0110000); + INSN(rorrw, 0b0111011, 0b101, 0b0110000); INSN(sh1add, 0b0110011, 0b010, 0b0010000); INSN(sh2add, 0b0110011, 0b100, 0b0010000); INSN(sh3add, 0b0110011, 0b110, 0b0010000); diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp index 2575c9cfe9802..17735032e008e 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp @@ -2870,7 +2870,7 @@ void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2 } // rotate right with shift bits -void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp) +void MacroAssembler::ror(Register dst, Register src, uint32_t shift, Register tmp) { if (UseZbb) { rori(dst, src, shift); @@ -2886,7 +2886,7 @@ void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Registe } // rotate left with shift bits, 32-bit version -void MacroAssembler::rolw_imm(Register dst, Register src, uint32_t shift, Register tmp) { +void MacroAssembler::rolw(Register dst, Register src, uint32_t shift, Register tmp) { if (UseZbb) { // no roliw available roriw(dst, src, 32 - shift); @@ -4359,7 +4359,7 @@ bool MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass, // Linear probe. if (bit != 0) { - ror_imm(r_bitmap, r_bitmap, bit); + ror(r_bitmap, r_bitmap, bit); } // The slot we just inspected is at secondary_supers[r_array_index - 1]. @@ -4440,7 +4440,7 @@ void MacroAssembler::lookup_secondary_supers_table_slow_path(Register r_super_kl test_bit(t0, r_bitmap, 2); // look-ahead check (Bit 2); result is non-zero beqz(t0, L_fallthrough); - ror_imm(r_bitmap, r_bitmap, 1); + ror(r_bitmap, r_bitmap, 1); addi(r_array_index, r_array_index, 1); j(L_loop); } @@ -5054,7 +5054,7 @@ void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register shadd(t0, xstart, x, t0, LogBytesPerInt); ld(x_xstart, Address(t0, 0)); - ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian + ror(x_xstart, x_xstart, 32); // convert big-endian to little-endian bind(L_first_loop); subiw(idx, idx, 1); @@ -5064,7 +5064,7 @@ void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register shadd(t0, idx, y, t0, LogBytesPerInt); ld(y_idx, Address(t0, 0)); - ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian + ror(y_idx, y_idx, 32); // convert big-endian to little-endian bind(L_multiply); mulhu(t0, x_xstart, y_idx); @@ -5073,7 +5073,7 @@ void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register adc(carry, t0, zr, t1); subiw(kdx, kdx, 2); - ror_imm(product, product, 32); // back to big-endian + ror(product, product, 32); // back to big-endian shadd(t0, kdx, z, t0, LogBytesPerInt); sd(product, Address(t0, 0)); @@ -5134,8 +5134,8 @@ void MacroAssembler::multiply_128_x_128_loop(Register y, Register z, shadd(tmp6, idx, z, t0, LogBytesPerInt); - ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian - ror_imm(yz_idx2, yz_idx2, 32); + ror(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian + ror(yz_idx2, yz_idx2, 32); ld(t1, Address(tmp6, 0)); ld(t0, Address(tmp6, wordSize)); @@ -5143,8 +5143,8 @@ void MacroAssembler::multiply_128_x_128_loop(Register y, Register z, mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 mulhu(tmp4, product_hi, yz_idx1); - ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian - ror_imm(t1, t1, 32, tmp); + ror(t0, t0, 32, tmp); // convert big-endian to little-endian + ror(t1, t1, 32, tmp); mul(tmp, product_hi, yz_idx2); // yz_idx2 * product_hi -> carry2:tmp mulhu(carry2, product_hi, yz_idx2); @@ -5157,8 +5157,8 @@ void MacroAssembler::multiply_128_x_128_loop(Register y, Register z, cad(tmp4, tmp4, t1, carry2); adc(carry, carry, zr, carry2); - ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian - ror_imm(tmp4, tmp4, 32); + ror(tmp3, tmp3, 32); // convert little-endian to big-endian + ror(tmp4, tmp4, 32); sd(tmp4, Address(tmp6, 0)); sd(tmp3, Address(tmp6, wordSize)); @@ -5175,18 +5175,18 @@ void MacroAssembler::multiply_128_x_128_loop(Register y, Register z, shadd(t0, idx, y, t0, LogBytesPerInt); ld(yz_idx1, Address(t0, 0)); - ror_imm(yz_idx1, yz_idx1, 32); + ror(yz_idx1, yz_idx1, 32); mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 mulhu(tmp4, product_hi, yz_idx1); shadd(t0, idx, z, t0, LogBytesPerInt); ld(yz_idx2, Address(t0, 0)); - ror_imm(yz_idx2, yz_idx2, 32, tmp); + ror(yz_idx2, yz_idx2, 32, tmp); add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp); - ror_imm(tmp3, tmp3, 32, tmp); + ror(tmp3, tmp3, 32, tmp); sd(tmp3, Address(t0, 0)); bind(L_check_1); @@ -5358,7 +5358,7 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Regi shadd(t0, xstart, x, t0, LogBytesPerInt); ld(product_hi, Address(t0, 0)); - ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian + ror(product_hi, product_hi, 32); // convert big-endian to little-endian Label L_third_loop_prologue; bind(L_third_loop_prologue); diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp index 8d2e0dffc2738..b2e79edd96341 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp @@ -926,8 +926,8 @@ class MacroAssembler: public Assembler { void revbw(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in lower word, sign-extend void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in doubleword - void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0); - void rolw_imm(Register dst, Register src, uint32_t, Register tmp = t0); + void ror(Register dst, Register src, uint32_t shift, Register tmp = t0); + void rolw(Register dst, Register src, uint32_t shift, Register tmp = t0); void andi(Register Rd, Register Rn, int64_t imm, Register tmp = t0); void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1); diff --git a/src/hotspot/cpu/riscv/riscv_b.ad b/src/hotspot/cpu/riscv/riscv_b.ad index 535c0fd534ddf..990d9eab87c7b 100644 --- a/src/hotspot/cpu/riscv/riscv_b.ad +++ b/src/hotspot/cpu/riscv/riscv_b.ad @@ -60,7 +60,7 @@ instruct rorI_reg_b(iRegINoSp dst, iRegI src, iRegI shift) %{ format %{ "rorw $dst, $src, $shift\t#@rorI_reg_b" %} ins_cost(ALU_COST); ins_encode %{ - __ rorw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); + __ rorrw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); %} ins_pipe(ialu_reg_reg); %} @@ -72,7 +72,7 @@ instruct rorL_reg_b(iRegLNoSp dst, iRegL src, iRegI shift) %{ format %{ "ror $dst, $src, $shift\t#@rorL_reg_b" %} ins_cost(ALU_COST); ins_encode %{ - __ ror(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); + __ rorr(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); %} ins_pipe(ialu_reg_reg); %} @@ -84,7 +84,7 @@ instruct rolI_reg_b(iRegINoSp dst, iRegI src, iRegI shift) %{ format %{ "rolw $dst, $src, $shift\t#@rolI_reg_b" %} ins_cost(ALU_COST); ins_encode %{ - __ rolw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); + __ rolrw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); %} ins_pipe(ialu_reg_reg); %} @@ -96,7 +96,7 @@ instruct rolL_reg_b(iRegLNoSp dst, iRegL src, iRegI shift) %{ format %{ "rol $dst, $src, $shift\t#@rolL_reg_b" %} ins_cost(ALU_COST); ins_encode %{ - __ rol(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); + __ rolr(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); %} ins_pipe(ialu_reg_reg); %} diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp index 134669f982fee..439162f01f105 100644 --- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp @@ -3556,7 +3556,7 @@ class StubGenerator: public StubCodeGenerator { void reverse1(Register d, Register s, Register tmp) { subi(s, s, wordSize); ld(tmp, Address(s)); - ror_imm(tmp, tmp, 32, t0); + ror(tmp, tmp, 32, t0); sd(tmp, Address(d)); addi(d, d, wordSize); } @@ -4523,7 +4523,7 @@ class StubGenerator: public StubCodeGenerator { __ addw(a, a, value); // a = Integer.rotateLeft(a, s) + b; - __ rolw_imm(a, a, s); + __ rolw(a, a, s); __ addw(a, a, b); } @@ -5028,7 +5028,7 @@ class StubGenerator: public StubCodeGenerator { __ xorr(cur_w, cur_w, t1); __ xorr(cur_w, cur_w, t0); - __ rolw_imm(cur_w, cur_w, 1, t0); + __ rolw(cur_w, cur_w, 1, t0); // copy the cur_w value to ws[8]. // now, valid w't values are at: @@ -5048,7 +5048,7 @@ class StubGenerator: public StubCodeGenerator { __ xorr(cur_w, ws[(idx-16)/2], ws[(idx-14)/2]); __ xorr(cur_w, cur_w, t0); - __ rolw_imm(cur_w, cur_w, 1, t0); + __ rolw(cur_w, cur_w, 1, t0); // copy the cur_w value to ws[8] __ zext(cur_w, cur_w, 32); @@ -5113,7 +5113,7 @@ class StubGenerator: public StubCodeGenerator { Register tmp3 = e; __ add(tmp2, cur_k, tmp2); __ add(tmp3, tmp3, tmp2); - __ rolw_imm(tmp2, a, 5, t0); + __ rolw(tmp2, a, 5, t0); sha1_f(tmp, b, c, d, round); @@ -5128,7 +5128,7 @@ class StubGenerator: public StubCodeGenerator { __ mv(e, d); __ mv(d, c); - __ rolw_imm(c, b, 30); + __ rolw(c, b, 30); __ mv(b, a); __ mv(a, tmp2); } From 55eaaac654bf74395c710635a5f003c54800de40 Mon Sep 17 00:00:00 2001 From: Fei Yang Date: Sat, 21 Dec 2024 14:20:59 +0800 Subject: [PATCH 4/5] Revert unnecessary change --- src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp index 17735032e008e..f0e5992b04b65 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp @@ -6142,10 +6142,10 @@ void MacroAssembler::test_bit(Register Rd, Register Rs, uint32_t bit_pos) { } int64_t imm = (int64_t)(1UL << bit_pos); if (is_simm12(imm)) { - andi(Rd, Rs, imm); + and_imm12(Rd, Rs, imm); } else { srli(Rd, Rs, bit_pos); - andi(Rd, Rd, 1); + and_imm12(Rd, Rd, 1); } } From ba5f37cc96816f9b9d4c2cf99c9b4ae90ebaa2bd Mon Sep 17 00:00:00 2001 From: Fei Yang Date: Mon, 23 Dec 2024 11:19:14 +0800 Subject: [PATCH 5/5] Review comments --- src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 6 +++--- src/hotspot/cpu/riscv/templateTable_riscv.cpp | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp index 953d162deee91..56a3ce6e0cc56 100644 --- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp @@ -1752,7 +1752,7 @@ void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register ca profile_obj_type(tmp, mdo_arg_addr, t1); int to_add = in_bytes(TypeStackSlotEntries::per_arg_size()); - add(off_to_args, off_to_args, to_add); + addi(off_to_args, off_to_args, to_add); // increment index by 1 addi(index, index, 1); @@ -1849,7 +1849,7 @@ void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register t // mdo start + parameters offset + array length - 1 add(mdp, mdp, tmp1); ld(tmp1, Address(mdp, ArrayData::array_len_offset())); - sub(tmp1, tmp1, TypeStackSlotEntries::per_arg_count()); + subi(tmp1, tmp1, TypeStackSlotEntries::per_arg_count()); Label loop; bind(loop); @@ -1875,7 +1875,7 @@ void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register t profile_obj_type(tmp2, arg_type, tmp3); // go to next parameter - sub(tmp1, tmp1, TypeStackSlotEntries::per_arg_count()); + subi(tmp1, tmp1, TypeStackSlotEntries::per_arg_count()); bgez(tmp1, loop); bind(profile_continue); diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp index 8e05a63d0e99d..59e9ab95d94b5 100644 --- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp +++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp @@ -197,7 +197,7 @@ void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg, Label L_fast_patch; // if a breakpoint is present we can't rewrite the stream directly __ load_unsigned_byte(temp_reg, at_bcp(0)); - __ sub(temp_reg, temp_reg, Bytecodes::_breakpoint); // temp_reg is temporary register. + __ subi(temp_reg, temp_reg, Bytecodes::_breakpoint); // temp_reg is temporary register. __ bnez(temp_reg, L_fast_patch); // Let breakpoint table handling rewrite to quicker bytecode __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), xmethod, xbcp, bc_reg); @@ -209,7 +209,7 @@ void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg, Label L_okay; __ load_unsigned_byte(temp_reg, at_bcp(0)); __ beq(temp_reg, bc_reg, L_okay); - __ sub(temp_reg, temp_reg, (int)Bytecodes::java_code(bc)); + __ subi(temp_reg, temp_reg, (int)Bytecodes::java_code(bc)); __ beqz(temp_reg, L_okay); __ stop("patching the wrong bytecode"); __ bind(L_okay);