Skip to content

Commit

Permalink
8309502: RISC-V: String.indexOf intrinsic may produce misaligned memo…
Browse files Browse the repository at this point in the history
…ry loads

Reviewed-by: luhenry, fjiang, fyang
  • Loading branch information
Vladimir Kempik committed Jun 15, 2023
1 parent 181845a commit 6b94289
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 17 deletions.
61 changes: 50 additions & 11 deletions src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,9 @@ void C2_MacroAssembler::string_indexof(Register haystack, Register needle,
}
bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char
add(result, haystack, isLL ? nlen_tmp : ch2);
ld(ch2, Address(result)); // load 8 bytes from source string
// load 8 bytes from source string
// if isLL is false then read granularity can be 2
load_long_misaligned(ch2, Address(result), ch1, isLL ? 1 : 2); // can use ch1 as temp register here as it will be trashed by next mv anyway
mv(ch1, tmp6);
if (isLL) {
j(BMLOOPSTR1_AFTER_LOAD);
Expand Down Expand Up @@ -679,10 +681,30 @@ void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register ne
slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp
add(haystack, haystack, tmp3);
neg(hlen_neg, tmp3);
if (AvoidUnalignedAccesses) {
// preload first value, then we will read by 1 character per loop, instead of four
// just shifting previous ch2 right by size of character in bits
add(tmp3, haystack, hlen_neg);
(this->*load_4chr)(ch2, Address(tmp3), noreg);
if (isLL) {
// need to erase 1 most significant byte in 32-bit value of ch2
slli(ch2, ch2, 40);
srli(ch2, ch2, 32);
} else {
slli(ch2, ch2, 16); // 2 most significant bytes will be erased by this operation
}
}

bind(CH1_LOOP);
add(ch2, haystack, hlen_neg);
(this->*load_4chr)(ch2, Address(ch2), noreg);
add(tmp3, haystack, hlen_neg);
if (AvoidUnalignedAccesses) {
srli(ch2, ch2, isLL ? 8 : 16);
(this->*haystack_load_1chr)(tmp3, Address(tmp3, isLL ? 3 : 6), noreg);
slli(tmp3, tmp3, isLL ? 24 : 48);
add(ch2, ch2, tmp3);
} else {
(this->*load_4chr)(ch2, Address(tmp3), noreg);
}
beq(ch1, ch2, MATCH);
add(hlen_neg, hlen_neg, haystack_chr_size);
blez(hlen_neg, CH1_LOOP);
Expand All @@ -700,10 +722,23 @@ void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register ne
slli(tmp3, result_tmp, haystack_chr_shift);
add(haystack, haystack, tmp3);
neg(hlen_neg, tmp3);

if (AvoidUnalignedAccesses) {
// preload first value, then we will read by 1 character per loop, instead of two
// just shifting previous ch2 right by size of character in bits
add(tmp3, haystack, hlen_neg);
(this->*haystack_load_1chr)(ch2, Address(tmp3), noreg);
slli(ch2, ch2, isLL ? 8 : 16);
}
bind(CH1_LOOP);
add(tmp3, haystack, hlen_neg);
(this->*load_2chr)(ch2, Address(tmp3), noreg);
if (AvoidUnalignedAccesses) {
srli(ch2, ch2, isLL ? 8 : 16);
(this->*haystack_load_1chr)(tmp3, Address(tmp3, isLL ? 1 : 2), noreg);
slli(tmp3, tmp3, isLL ? 8 : 16);
add(ch2, ch2, tmp3);
} else {
(this->*load_2chr)(ch2, Address(tmp3), noreg);
}
beq(ch1, ch2, MATCH);
add(hlen_neg, hlen_neg, haystack_chr_size);
blez(hlen_neg, CH1_LOOP);
Expand All @@ -727,7 +762,14 @@ void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register ne

bind(FIRST_LOOP);
add(ch2, haystack, hlen_neg);
(this->*load_2chr)(ch2, Address(ch2), noreg);
if (AvoidUnalignedAccesses) {
(this->*haystack_load_1chr)(tmp2, Address(ch2, isLL ? 1 : 2), noreg); // we need a temp register, we can safely use hlen_tmp here, which is a synonym for tmp2
(this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
slli(tmp2, tmp2, isLL ? 8 : 16);
add(ch2, ch2, tmp2);
} else {
(this->*load_2chr)(ch2, Address(ch2), noreg);
}
beq(first, ch2, STR1_LOOP);

bind(STR2_NEXT);
Expand All @@ -751,10 +793,7 @@ void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register ne
bind(DO1);
(this->*needle_load_1chr)(ch1, Address(needle), noreg);
sub(result_tmp, haystack_len, 1);
mv(tmp3, result_tmp);
if (haystack_chr_shift) {
slli(tmp3, result_tmp, haystack_chr_shift);
}
slli(tmp3, result_tmp, haystack_chr_shift);
add(haystack, haystack, tmp3);
neg(hlen_neg, tmp3);

Expand Down Expand Up @@ -1932,4 +1971,4 @@ void C2_MacroAssembler::extract_fp_v(FloatRegister dst, VectorRegister src, Basi
vslidedown_vx(tmp, src, t0);
vfmv_f_s(dst, tmp);
}
}
}
34 changes: 28 additions & 6 deletions src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1700,12 +1700,29 @@ void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in
}
}

// granularity is 1, 2 bytes per load
// granularity is 1 OR 2 bytes per load. dst and src.base() allowed to be the same register
void MacroAssembler::load_short_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity) {
if (granularity != 1 && granularity != 2) {
ShouldNotReachHere();
}
if (AvoidUnalignedAccesses && (granularity != 2)) {
assert_different_registers(dst, tmp);
assert_different_registers(tmp, src.base());
is_signed ? lb(tmp, Address(src.base(), src.offset() + 1)) : lbu(tmp, Address(src.base(), src.offset() + 1));
slli(tmp, tmp, 8);
lbu(dst, src);
add(dst, dst, tmp);
} else {
is_signed ? lh(dst, src) : lhu(dst, src);
}
}

// granularity is 1, 2 OR 4 bytes per load, if granularity 2 or 4 then dst and src.base() allowed to be the same register
void MacroAssembler::load_int_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity) {
if (AvoidUnalignedAccesses && (granularity != 4)) {
assert_different_registers(dst, tmp, src.base());
switch(granularity) {
case 1:
assert_different_registers(dst, tmp, src.base());
lbu(dst, src);
lbu(tmp, Address(src.base(), src.offset() + 1));
slli(tmp, tmp, 8);
Expand All @@ -1718,9 +1735,11 @@ void MacroAssembler::load_int_misaligned(Register dst, Address src, Register tmp
add(dst, dst, tmp);
break;
case 2:
lhu(dst, src);
assert_different_registers(dst, tmp);
assert_different_registers(tmp, src.base());
is_signed ? lh(tmp, Address(src.base(), src.offset() + 2)) : lhu(tmp, Address(src.base(), src.offset() + 2));
slli(tmp, tmp, 16);
lhu(dst, src);
add(dst, dst, tmp);
break;
default:
Expand All @@ -1731,12 +1750,12 @@ void MacroAssembler::load_int_misaligned(Register dst, Address src, Register tmp
}
}

// granularity is 1, 2 or 4 bytes per load
// granularity is 1, 2, 4 or 8 bytes per load, if granularity 4 or 8 then dst and src.base() allowed to be same register
void MacroAssembler::load_long_misaligned(Register dst, Address src, Register tmp, int granularity) {
if (AvoidUnalignedAccesses && (granularity != 8)) {
assert_different_registers(dst, tmp, src.base());
switch(granularity){
case 1:
assert_different_registers(dst, tmp, src.base());
lbu(dst, src);
lbu(tmp, Address(src.base(), src.offset() + 1));
slli(tmp, tmp, 8);
Expand All @@ -1761,6 +1780,7 @@ void MacroAssembler::load_long_misaligned(Register dst, Address src, Register tm
add(dst, dst, tmp);
break;
case 2:
assert_different_registers(dst, tmp, src.base());
lhu(dst, src);
lhu(tmp, Address(src.base(), src.offset() + 2));
slli(tmp, tmp, 16);
Expand All @@ -1773,9 +1793,11 @@ void MacroAssembler::load_long_misaligned(Register dst, Address src, Register tm
add(dst, dst, tmp);
break;
case 4:
lwu(dst, src);
assert_different_registers(dst, tmp);
assert_different_registers(tmp, src.base());
lwu(tmp, Address(src.base(), src.offset() + 4));
slli(tmp, tmp, 32);
lwu(dst, src);
add(dst, dst, tmp);
break;
default:
Expand Down
1 change: 1 addition & 0 deletions src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,7 @@ class MacroAssembler: public Assembler {
void store_sized_value(Address dst, Register src, size_t size_in_bytes);

// Misaligned loads, will use the best way, according to the AvoidUnalignedAccess flag
void load_short_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity = 1);
void load_int_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity = 1);
void load_long_misaligned(Register dst, Address src, Register tmp, int granularity = 1);

Expand Down

5 comments on commit 6b94289

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@VladimirKempik
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/backport jdk21

@openjdk
Copy link

@openjdk openjdk bot commented on 6b94289 Jun 15, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@VladimirKempik the backport was successfully created on the branch VladimirKempik-backport-6b942893 in my personal fork of openjdk/jdk21. To create a pull request with this backport targeting openjdk/jdk21:master, just click the following link:

➡️ Create pull request

The title of the pull request is automatically filled in correctly and below you find a suggestion for the pull request body:

Hi all,

This pull request contains a backport of commit 6b942893 from the openjdk/jdk repository.

The commit being backported was authored by Vladimir Kempik on 15 Jun 2023 and was reviewed by Ludovic Henry, Feilong Jiang and Fei Yang.

Thanks!

If you need to update the source branch of the pull then run the following commands in a local clone of your personal fork of openjdk/jdk21:

$ git fetch https://github.com/openjdk-bots/jdk21.git VladimirKempik-backport-6b942893:VladimirKempik-backport-6b942893
$ git checkout VladimirKempik-backport-6b942893
# make changes
$ git add paths/to/changed/files
$ git commit --message 'Describe additional changes made'
$ git push https://github.com/openjdk-bots/jdk21.git VladimirKempik-backport-6b942893

@VladimirKempik
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/backport jdk21u

@openjdk
Copy link

@openjdk openjdk bot commented on 6b94289 Jun 20, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@VladimirKempik the backport was successfully created on the branch VladimirKempik-backport-6b942893 in my personal fork of openjdk/jdk21u. To create a pull request with this backport targeting openjdk/jdk21u:master, just click the following link:

➡️ Create pull request

The title of the pull request is automatically filled in correctly and below you find a suggestion for the pull request body:

Hi all,

This pull request contains a backport of commit 6b942893 from the openjdk/jdk repository.

The commit being backported was authored by Vladimir Kempik on 15 Jun 2023 and was reviewed by Ludovic Henry, Feilong Jiang and Fei Yang.

Thanks!

If you need to update the source branch of the pull then run the following commands in a local clone of your personal fork of openjdk/jdk21u:

$ git fetch https://github.com/openjdk-bots/jdk21u.git VladimirKempik-backport-6b942893:VladimirKempik-backport-6b942893
$ git checkout VladimirKempik-backport-6b942893
# make changes
$ git add paths/to/changed/files
$ git commit --message 'Describe additional changes made'
$ git push https://github.com/openjdk-bots/jdk21u.git VladimirKempik-backport-6b942893

Please sign in to comment.