Skip to content

Commit

Permalink
8309502: RISC-V: String.indexOf intrinsic may produce misaligned memo…
Browse files Browse the repository at this point in the history
…ry loads

Reviewed-by: vkempik
Backport-of: 6b942893868fa1a64977288bdbdb1bbff8bd9d9c
  • Loading branch information
Olga Mikhaltsova committed Oct 10, 2023
1 parent 943ee59 commit 8e4f66a
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 16 deletions.
59 changes: 49 additions & 10 deletions src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
Expand Up @@ -489,7 +489,9 @@ void C2_MacroAssembler::string_indexof(Register haystack, Register needle,
}
bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char
add(result, haystack, isLL ? nlen_tmp : ch2);
ld(ch2, Address(result)); // load 8 bytes from source string
// load 8 bytes from source string
// if isLL is false then read granularity can be 2
load_long_misaligned(ch2, Address(result), ch1, isLL ? 1 : 2); // can use ch1 as temp register here as it will be trashed by next mv anyway
mv(ch1, tmp6);
if (isLL) {
j(BMLOOPSTR1_AFTER_LOAD);
Expand Down Expand Up @@ -672,10 +674,30 @@ void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register ne
slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp
add(haystack, haystack, tmp3);
neg(hlen_neg, tmp3);
if (AvoidUnalignedAccesses) {
// preload first value, then we will read by 1 character per loop, instead of four
// just shifting previous ch2 right by size of character in bits
add(tmp3, haystack, hlen_neg);
(this->*load_4chr)(ch2, Address(tmp3), noreg);
if (isLL) {
// need to erase 1 most significant byte in 32-bit value of ch2
slli(ch2, ch2, 40);
srli(ch2, ch2, 32);
} else {
slli(ch2, ch2, 16); // 2 most significant bytes will be erased by this operation
}
}

bind(CH1_LOOP);
add(ch2, haystack, hlen_neg);
(this->*load_4chr)(ch2, Address(ch2), noreg);
add(tmp3, haystack, hlen_neg);
if (AvoidUnalignedAccesses) {
srli(ch2, ch2, isLL ? 8 : 16);
(this->*haystack_load_1chr)(tmp3, Address(tmp3, isLL ? 3 : 6), noreg);
slli(tmp3, tmp3, isLL ? 24 : 48);
add(ch2, ch2, tmp3);
} else {
(this->*load_4chr)(ch2, Address(tmp3), noreg);
}
beq(ch1, ch2, MATCH);
add(hlen_neg, hlen_neg, haystack_chr_size);
blez(hlen_neg, CH1_LOOP);
Expand All @@ -693,10 +715,23 @@ void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register ne
slli(tmp3, result_tmp, haystack_chr_shift);
add(haystack, haystack, tmp3);
neg(hlen_neg, tmp3);

if (AvoidUnalignedAccesses) {
// preload first value, then we will read by 1 character per loop, instead of two
// just shifting previous ch2 right by size of character in bits
add(tmp3, haystack, hlen_neg);
(this->*haystack_load_1chr)(ch2, Address(tmp3), noreg);
slli(ch2, ch2, isLL ? 8 : 16);
}
bind(CH1_LOOP);
add(tmp3, haystack, hlen_neg);
(this->*load_2chr)(ch2, Address(tmp3), noreg);
if (AvoidUnalignedAccesses) {
srli(ch2, ch2, isLL ? 8 : 16);
(this->*haystack_load_1chr)(tmp3, Address(tmp3, isLL ? 1 : 2), noreg);
slli(tmp3, tmp3, isLL ? 8 : 16);
add(ch2, ch2, tmp3);
} else {
(this->*load_2chr)(ch2, Address(tmp3), noreg);
}
beq(ch1, ch2, MATCH);
add(hlen_neg, hlen_neg, haystack_chr_size);
blez(hlen_neg, CH1_LOOP);
Expand All @@ -720,7 +755,14 @@ void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register ne

bind(FIRST_LOOP);
add(ch2, haystack, hlen_neg);
(this->*load_2chr)(ch2, Address(ch2), noreg);
if (AvoidUnalignedAccesses) {
(this->*haystack_load_1chr)(tmp2, Address(ch2, isLL ? 1 : 2), noreg); // we need a temp register, we can safely use hlen_tmp here, which is a synonym for tmp2
(this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
slli(tmp2, tmp2, isLL ? 8 : 16);
add(ch2, ch2, tmp2);
} else {
(this->*load_2chr)(ch2, Address(ch2), noreg);
}
beq(first, ch2, STR1_LOOP);

bind(STR2_NEXT);
Expand All @@ -744,10 +786,7 @@ void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register ne
bind(DO1);
(this->*needle_load_1chr)(ch1, Address(needle), noreg);
sub(result_tmp, haystack_len, 1);
mv(tmp3, result_tmp);
if (haystack_chr_shift) {
slli(tmp3, result_tmp, haystack_chr_shift);
}
slli(tmp3, result_tmp, haystack_chr_shift);
add(haystack, haystack, tmp3);
neg(hlen_neg, tmp3);

Expand Down
34 changes: 28 additions & 6 deletions src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
Expand Up @@ -1616,12 +1616,29 @@ void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in
}
}

// granularity is 1, 2 bytes per load
// granularity is 1 OR 2 bytes per load. dst and src.base() allowed to be the same register
void MacroAssembler::load_short_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity) {
if (granularity != 1 && granularity != 2) {
ShouldNotReachHere();
}
if (AvoidUnalignedAccesses && (granularity != 2)) {
assert_different_registers(dst, tmp);
assert_different_registers(tmp, src.base());
is_signed ? lb(tmp, Address(src.base(), src.offset() + 1)) : lbu(tmp, Address(src.base(), src.offset() + 1));
slli(tmp, tmp, 8);
lbu(dst, src);
add(dst, dst, tmp);
} else {
is_signed ? lh(dst, src) : lhu(dst, src);
}
}

// granularity is 1, 2 OR 4 bytes per load, if granularity 2 or 4 then dst and src.base() allowed to be the same register
void MacroAssembler::load_int_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity) {
if (AvoidUnalignedAccesses && (granularity != 4)) {
assert_different_registers(dst, tmp, src.base());
switch(granularity) {
case 1:
assert_different_registers(dst, tmp, src.base());
lbu(dst, src);
lbu(tmp, Address(src.base(), src.offset() + 1));
slli(tmp, tmp, 8);
Expand All @@ -1634,9 +1651,11 @@ void MacroAssembler::load_int_misaligned(Register dst, Address src, Register tmp
add(dst, dst, tmp);
break;
case 2:
lhu(dst, src);
assert_different_registers(dst, tmp);
assert_different_registers(tmp, src.base());
is_signed ? lh(tmp, Address(src.base(), src.offset() + 2)) : lhu(tmp, Address(src.base(), src.offset() + 2));
slli(tmp, tmp, 16);
lhu(dst, src);
add(dst, dst, tmp);
break;
default:
Expand All @@ -1647,12 +1666,12 @@ void MacroAssembler::load_int_misaligned(Register dst, Address src, Register tmp
}
}

// granularity is 1, 2 or 4 bytes per load
// granularity is 1, 2, 4 or 8 bytes per load, if granularity 4 or 8 then dst and src.base() allowed to be same register
void MacroAssembler::load_long_misaligned(Register dst, Address src, Register tmp, int granularity) {
if (AvoidUnalignedAccesses && (granularity != 8)) {
assert_different_registers(dst, tmp, src.base());
switch(granularity){
case 1:
assert_different_registers(dst, tmp, src.base());
lbu(dst, src);
lbu(tmp, Address(src.base(), src.offset() + 1));
slli(tmp, tmp, 8);
Expand All @@ -1677,6 +1696,7 @@ void MacroAssembler::load_long_misaligned(Register dst, Address src, Register tm
add(dst, dst, tmp);
break;
case 2:
assert_different_registers(dst, tmp, src.base());
lhu(dst, src);
lhu(tmp, Address(src.base(), src.offset() + 2));
slli(tmp, tmp, 16);
Expand All @@ -1689,9 +1709,11 @@ void MacroAssembler::load_long_misaligned(Register dst, Address src, Register tm
add(dst, dst, tmp);
break;
case 4:
lwu(dst, src);
assert_different_registers(dst, tmp);
assert_different_registers(tmp, src.base());
lwu(tmp, Address(src.base(), src.offset() + 4));
slli(tmp, tmp, 32);
lwu(dst, src);
add(dst, dst, tmp);
break;
default:
Expand Down
1 change: 1 addition & 0 deletions src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
Expand Up @@ -408,6 +408,7 @@ class MacroAssembler: public Assembler {
void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);

// Misaligned loads, will use the best way, according to the AvoidUnalignedAccess flag
void load_short_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity = 1);
void load_int_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity = 1);
void load_long_misaligned(Register dst, Address src, Register tmp, int granularity = 1);

Expand Down

1 comment on commit 8e4f66a

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.