Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8268231: Aarch64: Use ldp in intrinsics for String.compareTo #4722

Closed
wants to merge 8 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
@@ -4656,18 +4656,6 @@ class StubGenerator: public StubCodeGenerator {
return entry;
}

// code for comparing 16 bytes of strings with same encoding
void compare_string_16_bytes_same(Label &DIFF1, Label &DIFF2) {
Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, tmp1 = r10, tmp2 = r11;
__ ldr(rscratch1, Address(__ post(str1, 8)));
__ eor(rscratch2, tmp1, tmp2);
__ ldr(cnt1, Address(__ post(str2, 8)));
__ cbnz(rscratch2, DIFF1);
__ ldr(tmp1, Address(__ post(str1, 8)));
__ eor(rscratch2, rscratch1, cnt1);
__ ldr(tmp2, Address(__ post(str2, 8)));
__ cbnz(rscratch2, DIFF2);
}

// code for comparing 16 characters of strings with Latin1 and Utf16 encoding
void compare_string_16_x_LU(Register tmpL, Register tmpU, Label &DIFF1,
@@ -4874,15 +4862,18 @@ class StubGenerator: public StubCodeGenerator {
: "compare_long_string_same_encoding UU");
address entry = __ pc();
Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, cnt2 = r4,
tmp1 = r10, tmp2 = r11;
Label SMALL_LOOP, LARGE_LOOP_PREFETCH, CHECK_LAST, DIFF2, TAIL,
LENGTH_DIFF, DIFF, LAST_CHECK_AND_LENGTH_DIFF,
DIFF_LAST_POSITION, DIFF_LAST_POSITION2;
tmp1 = r10, tmp2 = r11, tmp1h = rscratch1, tmp2h = rscratch2;

Label LARGE_LOOP_PREFETCH, LOOP_COMPARE16, DIFF, LESS16, LESS8, CAL_DIFFERENCE, LENGTH_DIFF;

// exit from large loop when less than 64 bytes left to read or we're about
// to prefetch memory behind array border
int largeLoopExitCondition = MAX2(64, SoftwarePrefetchHintDistance)/(isLL ? 1 : 2);
// cnt1/cnt2 contains amount of characters to compare. cnt1 can be re-used
// update cnt2 counter with already loaded 8 bytes

// before jumping to stub, pre-load 8 bytes already, so do comparison directly
__ eor(rscratch2, tmp1, tmp2);
__ cbnz(rscratch2, CAL_DIFFERENCE);

__ sub(cnt2, cnt2, wordSize/(isLL ? 1 : 2));
// update pointers, because of previous read
__ add(str1, str1, wordSize);
@@ -4891,80 +4882,88 @@ class StubGenerator: public StubCodeGenerator {
__ bind(LARGE_LOOP_PREFETCH);
__ prfm(Address(str1, SoftwarePrefetchHintDistance));
__ prfm(Address(str2, SoftwarePrefetchHintDistance));
compare_string_16_bytes_same(DIFF, DIFF2);
compare_string_16_bytes_same(DIFF, DIFF2);

__ align(OptoLoopAlignment);
for (int i = 0; i < 4; i++) {
__ ldp(tmp1, tmp1h, Address(str1, i * 16));
__ ldp(tmp2, tmp2h, Address(str2, i * 16));
__ cmp(tmp1, tmp2);
__ ccmp(tmp1h, tmp2h, 0, Assembler::EQ);
__ br(Assembler::NE, DIFF);
}
__ sub(cnt2, cnt2, isLL ? 64 : 32);
compare_string_16_bytes_same(DIFF, DIFF2);
__ add(str1, str1, 64);
__ add(str2, str2, 64);
__ subs(rscratch2, cnt2, largeLoopExitCondition);
compare_string_16_bytes_same(DIFF, DIFF2);
__ br(__ GT, LARGE_LOOP_PREFETCH);
__ cbz(cnt2, LAST_CHECK_AND_LENGTH_DIFF); // no more chars left?
__ br(Assembler::GE, LARGE_LOOP_PREFETCH);
__ cbz(cnt2, LENGTH_DIFF); // no more chars left?
}
// less than 16 bytes left?
__ subs(cnt2, cnt2, isLL ? 16 : 8);
__ br(__ LT, TAIL);

__ subs(rscratch1, cnt2, isLL ? 16 : 8);
__ br(Assembler::LE, LESS16);
__ align(OptoLoopAlignment);
__ bind(SMALL_LOOP);
compare_string_16_bytes_same(DIFF, DIFF2);
__ subs(cnt2, cnt2, isLL ? 16 : 8);
__ br(__ GE, SMALL_LOOP);
__ bind(TAIL);
__ adds(cnt2, cnt2, isLL ? 16 : 8);
__ br(__ EQ, LAST_CHECK_AND_LENGTH_DIFF);
__ bind(LOOP_COMPARE16);
__ ldp(tmp1, tmp1h, Address(__ post(str1, 16)));
__ ldp(tmp2, tmp2h, Address(__ post(str2, 16)));
__ cmp(tmp1, tmp2);
__ ccmp(tmp1h, tmp2h, 0, Assembler::EQ);
__ br(Assembler::NE, DIFF);
__ sub(cnt2, cnt2, isLL ? 16 : 8);
__ subs(rscratch2, cnt2, isLL ? 16 : 8);
__ br(Assembler::LT, LESS16);

__ ldp(tmp1, tmp1h, Address(__ post(str1, 16)));
__ ldp(tmp2, tmp2h, Address(__ post(str2, 16)));
__ cmp(tmp1, tmp2);
__ ccmp(tmp1h, tmp2h, 0, Assembler::EQ);
__ br(Assembler::NE, DIFF);
__ sub(cnt2, cnt2, isLL ? 16 : 8);
__ subs(rscratch2, cnt2, isLL ? 16 : 8);
__ br(Assembler::GE, LOOP_COMPARE16);
__ cbz(cnt2, LENGTH_DIFF);

__ bind(LESS16);
// each 8 compare
__ subs(cnt2, cnt2, isLL ? 8 : 4);
__ br(__ LE, CHECK_LAST);
__ eor(rscratch2, tmp1, tmp2);
__ cbnz(rscratch2, DIFF);
__ br(Assembler::LE, LESS8);
__ ldr(tmp1, Address(__ post(str1, 8)));
__ ldr(tmp2, Address(__ post(str2, 8)));
__ eor(rscratch2, tmp1, tmp2);
__ cbnz(rscratch2, CAL_DIFFERENCE);
__ sub(cnt2, cnt2, isLL ? 8 : 4);
__ bind(CHECK_LAST);

__ bind(LESS8); // directly load last 8 bytes
if (!isLL) {
__ add(cnt2, cnt2, cnt2); // now in bytes
__ add(cnt2, cnt2, cnt2);
}
__ ldr(tmp1, Address(str1, cnt2));
__ ldr(tmp2, Address(str2, cnt2));
__ eor(rscratch2, tmp1, tmp2);
__ cbnz(rscratch2, DIFF);
__ ldr(rscratch1, Address(str1, cnt2));
__ ldr(cnt1, Address(str2, cnt2));
__ eor(rscratch2, rscratch1, cnt1);
__ cbz(rscratch2, LENGTH_DIFF);
// Find the first different characters in the longwords and
// compute their difference.
__ bind(DIFF2);
__ rev(rscratch2, rscratch2);
__ clz(rscratch2, rscratch2);
__ andr(rscratch2, rscratch2, isLL ? -8 : -16);
__ lsrv(rscratch1, rscratch1, rscratch2);
if (isLL) {
__ lsrv(cnt1, cnt1, rscratch2);
__ uxtbw(rscratch1, rscratch1);
__ uxtbw(cnt1, cnt1);
} else {
__ lsrv(cnt1, cnt1, rscratch2);
__ uxthw(rscratch1, rscratch1);
__ uxthw(cnt1, cnt1);
}
__ subw(result, rscratch1, cnt1);
__ b(LENGTH_DIFF);
__ b(CAL_DIFFERENCE);

__ bind(DIFF);
__ cmp(tmp1, tmp2);
__ csel(tmp1, tmp1, tmp1h, Assembler::NE);
__ csel(tmp2, tmp2, tmp2h, Assembler::NE);
// reuse rscratch2 register for the result of eor instruction
__ eor(rscratch2, tmp1, tmp2);

__ bind(CAL_DIFFERENCE);
__ rev(rscratch2, rscratch2);
__ clz(rscratch2, rscratch2);
__ andr(rscratch2, rscratch2, isLL ? -8 : -16);
__ lsrv(tmp1, tmp1, rscratch2);
__ lsrv(tmp2, tmp2, rscratch2);
if (isLL) {
__ lsrv(tmp2, tmp2, rscratch2);
__ uxtbw(tmp1, tmp1);
__ uxtbw(tmp2, tmp2);
} else {
__ lsrv(tmp2, tmp2, rscratch2);
__ uxthw(tmp1, tmp1);
__ uxthw(tmp2, tmp2);
}
__ subw(result, tmp1, tmp2);
__ b(LENGTH_DIFF);
__ bind(LAST_CHECK_AND_LENGTH_DIFF);
__ eor(rscratch2, tmp1, tmp2);
__ cbnz(rscratch2, DIFF);

__ bind(LENGTH_DIFF);
__ ret(lr);
return entry;