Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 50 additions & 11 deletions src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,9 @@ void C2_MacroAssembler::string_indexof(Register haystack, Register needle,
}
bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char
add(result, haystack, isLL ? nlen_tmp : ch2);
ld(ch2, Address(result)); // load 8 bytes from source string
// load 8 bytes from source string
// if isLL is false then read granularity can be 2
load_long_misaligned(ch2, Address(result), ch1, isLL ? 1 : 2); // can use ch1 as temp register here as it will be trashed by next mv anyway
mv(ch1, tmp6);
if (isLL) {
j(BMLOOPSTR1_AFTER_LOAD);
Expand Down Expand Up @@ -679,10 +681,30 @@ void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register ne
slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp
add(haystack, haystack, tmp3);
neg(hlen_neg, tmp3);
if (AvoidUnalignedAccesses) {
// preload first value, then we will read by 1 character per loop, instead of four
// just shifting previous ch2 right by size of character in bits
add(tmp3, haystack, hlen_neg);
(this->*load_4chr)(ch2, Address(tmp3), noreg);
if (isLL) {
// need to erase 1 most significant byte in 32-bit value of ch2
slli(ch2, ch2, 40);
srli(ch2, ch2, 32);
} else {
slli(ch2, ch2, 16); // 2 most significant bytes will be erased by this operation
}
}

bind(CH1_LOOP);
add(ch2, haystack, hlen_neg);
(this->*load_4chr)(ch2, Address(ch2), noreg);
add(tmp3, haystack, hlen_neg);
if (AvoidUnalignedAccesses) {
srli(ch2, ch2, isLL ? 8 : 16);
(this->*haystack_load_1chr)(tmp3, Address(tmp3, isLL ? 3 : 6), noreg);
slli(tmp3, tmp3, isLL ? 24 : 48);
add(ch2, ch2, tmp3);
} else {
(this->*load_4chr)(ch2, Address(tmp3), noreg);
}
beq(ch1, ch2, MATCH);
add(hlen_neg, hlen_neg, haystack_chr_size);
blez(hlen_neg, CH1_LOOP);
Expand All @@ -700,10 +722,23 @@ void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register ne
slli(tmp3, result_tmp, haystack_chr_shift);
add(haystack, haystack, tmp3);
neg(hlen_neg, tmp3);

if (AvoidUnalignedAccesses) {
// preload first value, then we will read by 1 character per loop, instead of two
// just shifting previous ch2 right by size of character in bits
add(tmp3, haystack, hlen_neg);
(this->*haystack_load_1chr)(ch2, Address(tmp3), noreg);
slli(ch2, ch2, isLL ? 8 : 16);
}
bind(CH1_LOOP);
add(tmp3, haystack, hlen_neg);
(this->*load_2chr)(ch2, Address(tmp3), noreg);
if (AvoidUnalignedAccesses) {
srli(ch2, ch2, isLL ? 8 : 16);
(this->*haystack_load_1chr)(tmp3, Address(tmp3, isLL ? 1 : 2), noreg);
slli(tmp3, tmp3, isLL ? 8 : 16);
add(ch2, ch2, tmp3);
} else {
(this->*load_2chr)(ch2, Address(tmp3), noreg);
}
beq(ch1, ch2, MATCH);
add(hlen_neg, hlen_neg, haystack_chr_size);
blez(hlen_neg, CH1_LOOP);
Expand All @@ -727,7 +762,14 @@ void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register ne

bind(FIRST_LOOP);
add(ch2, haystack, hlen_neg);
(this->*load_2chr)(ch2, Address(ch2), noreg);
if (AvoidUnalignedAccesses) {
(this->*haystack_load_1chr)(tmp2, Address(ch2, isLL ? 1 : 2), noreg); // we need a temp register, we can safely use hlen_tmp here, which is a synonym for tmp2
(this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
slli(tmp2, tmp2, isLL ? 8 : 16);
add(ch2, ch2, tmp2);
} else {
(this->*load_2chr)(ch2, Address(ch2), noreg);
}
beq(first, ch2, STR1_LOOP);

bind(STR2_NEXT);
Expand All @@ -751,10 +793,7 @@ void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register ne
bind(DO1);
(this->*needle_load_1chr)(ch1, Address(needle), noreg);
sub(result_tmp, haystack_len, 1);
mv(tmp3, result_tmp);
if (haystack_chr_shift) {
slli(tmp3, result_tmp, haystack_chr_shift);
}
slli(tmp3, result_tmp, haystack_chr_shift);
add(haystack, haystack, tmp3);
neg(hlen_neg, tmp3);

Expand Down Expand Up @@ -1918,4 +1957,4 @@ void C2_MacroAssembler::extract_fp_v(FloatRegister dst, VectorRegister src, Basi
vslidedown_vx(tmp, src, t0);
vfmv_f_s(dst, tmp);
}
}
}
34 changes: 28 additions & 6 deletions src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1700,12 +1700,29 @@ void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in
}
}

// granularity is 1, 2 bytes per load
// granularity is 1 OR 2 bytes per load. dst and src.base() allowed to be the same register
void MacroAssembler::load_short_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity) {
if (granularity != 1 && granularity != 2) {
ShouldNotReachHere();
}
if (AvoidUnalignedAccesses && (granularity != 2)) {
assert_different_registers(dst, tmp);
assert_different_registers(tmp, src.base());
is_signed ? lb(tmp, Address(src.base(), src.offset() + 1)) : lbu(tmp, Address(src.base(), src.offset() + 1));
slli(tmp, tmp, 8);
lbu(dst, src);
add(dst, dst, tmp);
} else {
is_signed ? lh(dst, src) : lhu(dst, src);
}
}

// granularity is 1, 2 OR 4 bytes per load, if granularity 2 or 4 then dst and src.base() allowed to be the same register
void MacroAssembler::load_int_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity) {
if (AvoidUnalignedAccesses && (granularity != 4)) {
assert_different_registers(dst, tmp, src.base());
switch(granularity) {
case 1:
assert_different_registers(dst, tmp, src.base());
lbu(dst, src);
lbu(tmp, Address(src.base(), src.offset() + 1));
slli(tmp, tmp, 8);
Expand All @@ -1718,9 +1735,11 @@ void MacroAssembler::load_int_misaligned(Register dst, Address src, Register tmp
add(dst, dst, tmp);
break;
case 2:
lhu(dst, src);
assert_different_registers(dst, tmp);
assert_different_registers(tmp, src.base());
is_signed ? lh(tmp, Address(src.base(), src.offset() + 2)) : lhu(tmp, Address(src.base(), src.offset() + 2));
slli(tmp, tmp, 16);
lhu(dst, src);
add(dst, dst, tmp);
break;
default:
Expand All @@ -1731,12 +1750,12 @@ void MacroAssembler::load_int_misaligned(Register dst, Address src, Register tmp
}
}

// granularity is 1, 2 or 4 bytes per load
// granularity is 1, 2, 4 or 8 bytes per load, if granularity 4 or 8 then dst and src.base() allowed to be same register
void MacroAssembler::load_long_misaligned(Register dst, Address src, Register tmp, int granularity) {
if (AvoidUnalignedAccesses && (granularity != 8)) {
assert_different_registers(dst, tmp, src.base());
switch(granularity){
case 1:
assert_different_registers(dst, tmp, src.base());
lbu(dst, src);
lbu(tmp, Address(src.base(), src.offset() + 1));
slli(tmp, tmp, 8);
Expand All @@ -1761,6 +1780,7 @@ void MacroAssembler::load_long_misaligned(Register dst, Address src, Register tm
add(dst, dst, tmp);
break;
case 2:
assert_different_registers(dst, tmp, src.base());
lhu(dst, src);
lhu(tmp, Address(src.base(), src.offset() + 2));
slli(tmp, tmp, 16);
Expand All @@ -1773,9 +1793,11 @@ void MacroAssembler::load_long_misaligned(Register dst, Address src, Register tm
add(dst, dst, tmp);
break;
case 4:
lwu(dst, src);
assert_different_registers(dst, tmp);
assert_different_registers(tmp, src.base());
lwu(tmp, Address(src.base(), src.offset() + 4));
slli(tmp, tmp, 32);
lwu(dst, src);
add(dst, dst, tmp);
break;
default:
Expand Down
1 change: 1 addition & 0 deletions src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,7 @@ class MacroAssembler: public Assembler {
void store_sized_value(Address dst, Register src, size_t size_in_bytes);

// Misaligned loads, will use the best way, according to the AvoidUnalignedAccess flag
void load_short_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity = 1);
void load_int_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity = 1);
void load_long_misaligned(Register dst, Address src, Register tmp, int granularity = 1);

Expand Down