diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp index a075895cd6c95..798804269d4ae 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp @@ -787,7 +787,8 @@ void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1, j(NOMATCH); bind(HIT); - ctzc_bit(trailing_char, match_mask, isL, ch1, result); + // count bits of trailing zero chars + ctzc_bits(trailing_char, match_mask, isL, ch1, result); srli(trailing_char, trailing_char, 3); addi(cnt1, cnt1, 8); ble(cnt1, trailing_char, NOMATCH); @@ -1536,7 +1537,8 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2, // compute their difference. bind(DIFFERENCE); xorr(tmp3, tmp1, tmp2); - ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb + // count bits of trailing zero chars + ctzc_bits(result, tmp3, isLL); srl(tmp1, tmp1, result); srl(tmp2, tmp2, result); if (isLL) { diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp index 27452e7a6842c..804407fd80e68 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp @@ -5395,28 +5395,26 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Regi } #endif -// Count bits of trailing zero chars from lsb to msb until first non-zero element. -// For LL case, one byte for one element, so shift 8 bits once, and for other case, -// shift 16 bits once. -void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2) { +// Count bits of trailing zero chars from lsb to msb until first non-zero +// char seen. For the LL case, shift 8 bits once as there is only one byte +// per each char. For other cases, shift 16 bits once. +void MacroAssembler::ctzc_bits(Register Rd, Register Rs, bool isLL, + Register tmp1, Register tmp2) { + int step = isLL ? 8 : 16; if (UseZbb) { - assert_different_registers(Rd, Rs, tmp1); - int step = isLL ? 8 : 16; ctz(Rd, Rs); - andi(tmp1, Rd, step - 1); - sub(Rd, Rd, tmp1); + andi(Rd, Rd, -step); return; } - assert_different_registers(Rd, Rs, tmp1, tmp2); + assert_different_registers(Rd, tmp1, tmp2); Label Loop; - int step = isLL ? 8 : 16; - mv(Rd, -step); mv(tmp2, Rs); + mv(Rd, -step); bind(Loop); addi(Rd, Rd, step); - andi(tmp1, tmp2, ((1 << step) - 1)); + zext(tmp1, tmp2, step); srli(tmp2, tmp2, step); beqz(tmp1, Loop); } diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp index 1f5a47bf65acb..4cfc1c5525480 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp @@ -1362,7 +1362,8 @@ class MacroAssembler: public Assembler { void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); - void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1); + void ctzc_bits(Register Rd, Register Rs, bool isLL = false, + Register tmp1 = t0, Register tmp2 = t1); void zero_words(Register base, uint64_t cnt); address zero_words(Register ptr, Register cnt); diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp index 1afa3545afcf9..0b619923b06b1 100644 --- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp @@ -2568,7 +2568,8 @@ class StubGenerator: public StubCodeGenerator { // Find the first different characters in the longwords and // compute their difference. __ bind(CALCULATE_DIFFERENCE); - __ ctzc_bit(tmp4, tmp3); + // count bits of trailing zero chars + __ ctzc_bits(tmp4, tmp3); __ srl(tmp1, tmp1, tmp4); __ srl(tmp2, tmp2, tmp4); __ zext(tmp1, tmp1, 16); @@ -2703,7 +2704,8 @@ class StubGenerator: public StubCodeGenerator { // Find the first different characters in the longwords and // compute their difference. __ bind(DIFF2); - __ ctzc_bit(tmp3, tmp4, isLL); // count zero from lsb to msb + // count bits of trailing zero chars + __ ctzc_bits(tmp3, tmp4, isLL); __ srl(tmp5, tmp5, tmp3); __ srl(cnt1, cnt1, tmp3); if (isLL) { @@ -2716,7 +2718,8 @@ class StubGenerator: public StubCodeGenerator { __ sub(result, tmp5, cnt1); __ j(LENGTH_DIFF); __ bind(DIFF); - __ ctzc_bit(tmp3, tmp4, isLL); // count zero from lsb to msb + // count bits of trailing zero chars + __ ctzc_bits(tmp3, tmp4, isLL); __ srl(tmp1, tmp1, tmp3); __ srl(tmp2, tmp2, tmp3); if (isLL) { @@ -2862,7 +2865,8 @@ class StubGenerator: public StubCodeGenerator { __ beqz(match_mask, NOMATCH); __ bind(L_SMALL_HAS_ZERO_LOOP); - __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, ch2, tmp); // count trailing zeros + // count bits of trailing zero chars + __ ctzc_bits(trailing_zeros, match_mask, haystack_isL, ch2, tmp); __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15); __ mv(ch2, wordSize / haystack_chr_size); __ ble(needle_len, ch2, L_SMALL_CMP_LOOP_LAST_CMP2); @@ -2881,7 +2885,8 @@ class StubGenerator: public StubCodeGenerator { __ bind(L_SMALL_CMP_LOOP_NOMATCH); __ beqz(match_mask, NOMATCH); - __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, tmp, ch2); + // count bits of trailing zero chars + __ ctzc_bits(trailing_zeros, match_mask, haystack_isL, tmp, ch2); __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15); __ add(result, result, 1); __ add(haystack, haystack, haystack_chr_size); @@ -2900,7 +2905,8 @@ class StubGenerator: public StubCodeGenerator { __ align(OptoLoopAlignment); __ bind(L_HAS_ZERO); - __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, tmp, ch2); + // count bits of trailing zero chars + __ ctzc_bits(trailing_zeros, match_mask, haystack_isL, tmp, ch2); __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15); __ slli(needle_len, needle_len, BitsPerByte * wordSize / 2); __ orr(haystack_len, haystack_len, needle_len); // restore needle_len(32bits) @@ -2929,7 +2935,8 @@ class StubGenerator: public StubCodeGenerator { __ bind(L_CMP_LOOP_NOMATCH); __ beqz(match_mask, L_HAS_ZERO_LOOP_NOMATCH); - __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, needle_len, ch2); // find next "first" char index + // count bits of trailing zero chars + __ ctzc_bits(trailing_zeros, match_mask, haystack_isL, needle_len, ch2); __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15); __ add(haystack, haystack, haystack_chr_size); __ j(L_HAS_ZERO_LOOP);