Skip to content

Commit

Permalink
8268231: Aarch64: Use Ldp in intrinsics for String.compareTo
Browse files Browse the repository at this point in the history
Backport-of: 6d1d4d52928ed38bbc73ddcbede5389995a8e65f
  • Loading branch information
Dmitry Chuyko authored and Paul Hohensee committed Mar 3, 2022
1 parent 4631f4a commit dd83c4e
Showing 1 changed file with 66 additions and 67 deletions.
133 changes: 66 additions & 67 deletions src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5103,18 +5103,6 @@ class StubGenerator: public StubCodeGenerator {
return entry;
}

// code for comparing 16 bytes of strings with same encoding
void compare_string_16_bytes_same(Label &DIFF1, Label &DIFF2) {
Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, tmp1 = r10, tmp2 = r11;
__ ldr(rscratch1, Address(__ post(str1, 8)));
__ eor(rscratch2, tmp1, tmp2);
__ ldr(cnt1, Address(__ post(str2, 8)));
__ cbnz(rscratch2, DIFF1);
__ ldr(tmp1, Address(__ post(str1, 8)));
__ eor(rscratch2, rscratch1, cnt1);
__ ldr(tmp2, Address(__ post(str2, 8)));
__ cbnz(rscratch2, DIFF2);
}

// code for comparing 16 characters of strings with Latin1 and Utf16 encoding
void compare_string_16_x_LU(Register tmpL, Register tmpU, Label &DIFF1,
Expand Down Expand Up @@ -5321,15 +5309,18 @@ class StubGenerator: public StubCodeGenerator {
: "compare_long_string_same_encoding UU");
address entry = __ pc();
Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, cnt2 = r4,
tmp1 = r10, tmp2 = r11;
Label SMALL_LOOP, LARGE_LOOP_PREFETCH, CHECK_LAST, DIFF2, TAIL,
LENGTH_DIFF, DIFF, LAST_CHECK_AND_LENGTH_DIFF,
DIFF_LAST_POSITION, DIFF_LAST_POSITION2;
tmp1 = r10, tmp2 = r11, tmp1h = rscratch1, tmp2h = rscratch2;

Label LARGE_LOOP_PREFETCH, LOOP_COMPARE16, DIFF, LESS16, LESS8, CAL_DIFFERENCE, LENGTH_DIFF;

// exit from large loop when less than 64 bytes left to read or we're about
// to prefetch memory behind array border
int largeLoopExitCondition = MAX2(64, SoftwarePrefetchHintDistance)/(isLL ? 1 : 2);
// cnt1/cnt2 contains amount of characters to compare. cnt1 can be re-used
// update cnt2 counter with already loaded 8 bytes

// before jumping to stub, pre-load 8 bytes already, so do comparison directly
__ eor(rscratch2, tmp1, tmp2);
__ cbnz(rscratch2, CAL_DIFFERENCE);

__ sub(cnt2, cnt2, wordSize/(isLL ? 1 : 2));
// update pointers, because of previous read
__ add(str1, str1, wordSize);
Expand All @@ -5338,80 +5329,88 @@ class StubGenerator: public StubCodeGenerator {
__ bind(LARGE_LOOP_PREFETCH);
__ prfm(Address(str1, SoftwarePrefetchHintDistance));
__ prfm(Address(str2, SoftwarePrefetchHintDistance));
compare_string_16_bytes_same(DIFF, DIFF2);
compare_string_16_bytes_same(DIFF, DIFF2);

__ align(OptoLoopAlignment);
for (int i = 0; i < 4; i++) {
__ ldp(tmp1, tmp1h, Address(str1, i * 16));
__ ldp(tmp2, tmp2h, Address(str2, i * 16));
__ cmp(tmp1, tmp2);
__ ccmp(tmp1h, tmp2h, 0, Assembler::EQ);
__ br(Assembler::NE, DIFF);
}
__ sub(cnt2, cnt2, isLL ? 64 : 32);
compare_string_16_bytes_same(DIFF, DIFF2);
__ add(str1, str1, 64);
__ add(str2, str2, 64);
__ subs(rscratch2, cnt2, largeLoopExitCondition);
compare_string_16_bytes_same(DIFF, DIFF2);
__ br(__ GT, LARGE_LOOP_PREFETCH);
__ cbz(cnt2, LAST_CHECK_AND_LENGTH_DIFF); // no more chars left?
__ br(Assembler::GE, LARGE_LOOP_PREFETCH);
__ cbz(cnt2, LENGTH_DIFF); // no more chars left?
}
// less than 16 bytes left?
__ subs(cnt2, cnt2, isLL ? 16 : 8);
__ br(__ LT, TAIL);

__ subs(rscratch1, cnt2, isLL ? 16 : 8);
__ br(Assembler::LE, LESS16);
__ align(OptoLoopAlignment);
__ bind(SMALL_LOOP);
compare_string_16_bytes_same(DIFF, DIFF2);
__ subs(cnt2, cnt2, isLL ? 16 : 8);
__ br(__ GE, SMALL_LOOP);
__ bind(TAIL);
__ adds(cnt2, cnt2, isLL ? 16 : 8);
__ br(__ EQ, LAST_CHECK_AND_LENGTH_DIFF);
__ bind(LOOP_COMPARE16);
__ ldp(tmp1, tmp1h, Address(__ post(str1, 16)));
__ ldp(tmp2, tmp2h, Address(__ post(str2, 16)));
__ cmp(tmp1, tmp2);
__ ccmp(tmp1h, tmp2h, 0, Assembler::EQ);
__ br(Assembler::NE, DIFF);
__ sub(cnt2, cnt2, isLL ? 16 : 8);
__ subs(rscratch2, cnt2, isLL ? 16 : 8);
__ br(Assembler::LT, LESS16);

__ ldp(tmp1, tmp1h, Address(__ post(str1, 16)));
__ ldp(tmp2, tmp2h, Address(__ post(str2, 16)));
__ cmp(tmp1, tmp2);
__ ccmp(tmp1h, tmp2h, 0, Assembler::EQ);
__ br(Assembler::NE, DIFF);
__ sub(cnt2, cnt2, isLL ? 16 : 8);
__ subs(rscratch2, cnt2, isLL ? 16 : 8);
__ br(Assembler::GE, LOOP_COMPARE16);
__ cbz(cnt2, LENGTH_DIFF);

__ bind(LESS16);
// each 8 compare
__ subs(cnt2, cnt2, isLL ? 8 : 4);
__ br(__ LE, CHECK_LAST);
__ eor(rscratch2, tmp1, tmp2);
__ cbnz(rscratch2, DIFF);
__ br(Assembler::LE, LESS8);
__ ldr(tmp1, Address(__ post(str1, 8)));
__ ldr(tmp2, Address(__ post(str2, 8)));
__ eor(rscratch2, tmp1, tmp2);
__ cbnz(rscratch2, CAL_DIFFERENCE);
__ sub(cnt2, cnt2, isLL ? 8 : 4);
__ bind(CHECK_LAST);

__ bind(LESS8); // directly load last 8 bytes
if (!isLL) {
__ add(cnt2, cnt2, cnt2); // now in bytes
__ add(cnt2, cnt2, cnt2);
}
__ ldr(tmp1, Address(str1, cnt2));
__ ldr(tmp2, Address(str2, cnt2));
__ eor(rscratch2, tmp1, tmp2);
__ cbnz(rscratch2, DIFF);
__ ldr(rscratch1, Address(str1, cnt2));
__ ldr(cnt1, Address(str2, cnt2));
__ eor(rscratch2, rscratch1, cnt1);
__ cbz(rscratch2, LENGTH_DIFF);
// Find the first different characters in the longwords and
// compute their difference.
__ bind(DIFF2);
__ rev(rscratch2, rscratch2);
__ clz(rscratch2, rscratch2);
__ andr(rscratch2, rscratch2, isLL ? -8 : -16);
__ lsrv(rscratch1, rscratch1, rscratch2);
if (isLL) {
__ lsrv(cnt1, cnt1, rscratch2);
__ uxtbw(rscratch1, rscratch1);
__ uxtbw(cnt1, cnt1);
} else {
__ lsrv(cnt1, cnt1, rscratch2);
__ uxthw(rscratch1, rscratch1);
__ uxthw(cnt1, cnt1);
}
__ subw(result, rscratch1, cnt1);
__ b(LENGTH_DIFF);
__ b(CAL_DIFFERENCE);

__ bind(DIFF);
__ cmp(tmp1, tmp2);
__ csel(tmp1, tmp1, tmp1h, Assembler::NE);
__ csel(tmp2, tmp2, tmp2h, Assembler::NE);
// reuse rscratch2 register for the result of eor instruction
__ eor(rscratch2, tmp1, tmp2);

__ bind(CAL_DIFFERENCE);
__ rev(rscratch2, rscratch2);
__ clz(rscratch2, rscratch2);
__ andr(rscratch2, rscratch2, isLL ? -8 : -16);
__ lsrv(tmp1, tmp1, rscratch2);
__ lsrv(tmp2, tmp2, rscratch2);
if (isLL) {
__ lsrv(tmp2, tmp2, rscratch2);
__ uxtbw(tmp1, tmp1);
__ uxtbw(tmp2, tmp2);
} else {
__ lsrv(tmp2, tmp2, rscratch2);
__ uxthw(tmp1, tmp1);
__ uxthw(tmp2, tmp2);
}
__ subw(result, tmp1, tmp2);
__ b(LENGTH_DIFF);
__ bind(LAST_CHECK_AND_LENGTH_DIFF);
__ eor(rscratch2, tmp1, tmp2);
__ cbnz(rscratch2, DIFF);

__ bind(LENGTH_DIFF);
__ ret(lr);
return entry;
Expand Down

1 comment on commit dd83c4e

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.