Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8268231: Aarch64: Use ldp in intrinsics for String.compareTo #4722

Closed
wants to merge 8 commits into from
Closed
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
@@ -93,6 +93,8 @@ define_pd_global(intx, InlineSmallCode, 1000);
"Use SIMD instructions in generated array equals code") \
product(bool, UseSimpleArrayEquals, false, \
"Use simpliest and shortest implementation for array equals") \
product(bool, UseStringCompareWithLdp, false, \
"Use ldp enhance string compare intrinsic") \
product(bool, UseSIMDForBigIntegerShiftIntrinsics, true, \
"Use SIMD instructions for left/right shift of BigInteger") \
product(bool, AvoidUnalignedAccesses, false, \
@@ -4860,6 +4860,149 @@ class StubGenerator: public StubCodeGenerator {
return start;
}

// r0 = result
// r1 = str1
// r2 = cnt1
// r3 = str2
// r4 = cnt2
// r10 = tmp1
// r11 = tmp2
address generate_compare_long_string_same_encoding_withLdp(bool isLL) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", isLL
? "compare_long_string_same_encoding LL"
: "compare_long_string_same_encoding UU");

address entry = __ pc();
Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, cnt2 = r4,
tmp1 = r10, tmp2 = r11, tmp1h = rscratch1, tmp2h = rscratch2;


Label LOOP_COMPARE64, LOOP_COMPARE16, DIFF, LESS16, LESS8, CAL_DIFFERENCE, LENGTH_DIFF;

int largeLoopSize = 64/(isLL ? 1 : 2);

// before jumping to stub, pre-load 8 bytes already, so do comparison directly
__ eor(rscratch2, tmp1, tmp2);
__ cbnz(rscratch2, CAL_DIFFERENCE);

__ sub(cnt2, cnt2, wordSize/(isLL ? 1 : 2));
// update pointers, because of previous read
__ add(str1, str1, wordSize);
__ add(str2, str2, wordSize);

__ align(OptoLoopAlignment);
__ bind(LOOP_COMPARE64);
__ ldp(tmp1, tmp1h, Address(str1));
__ ldp(tmp2, tmp2h, Address(str2));
__ cmp(tmp1, tmp2);
__ ccmp(tmp1h, tmp2h, 0, Assembler::EQ);
__ br(__ NE, DIFF);

__ ldp(tmp1, tmp1h, Address(str1,16));
__ ldp(tmp2, tmp2h, Address(str2,16));
__ cmp(tmp1, tmp2);
__ ccmp(tmp1h, tmp2h, 0, Assembler::EQ);
__ br(__ NE, DIFF);

__ ldp(tmp1, tmp1h, Address(str1, 32));
__ ldp(tmp2, tmp2h, Address(str2, 32));
__ cmp(tmp1, tmp2);
__ ccmp(tmp1h, tmp2h, 0, Assembler::EQ);
__ br(__ NE, DIFF);

__ ldp(tmp1, tmp1h, Address(str1,48));
__ ldp(tmp2, tmp2h, Address(str2,48));
__ cmp(tmp1, tmp2);
__ ccmp(tmp1h, tmp2h, 0, Assembler::EQ);
__ br(__ NE, DIFF);

__ sub(cnt2, cnt2, isLL ? 64 : 32);
__ add(str1, str1, 64);
__ add(str2, str2, 64);
__ subs(rscratch2, cnt2, largeLoopSize);
__ br(__ GE, LOOP_COMPARE64);
__ cbz(cnt2, LENGTH_DIFF); // no more chars left?


__ subs(rscratch1, cnt2, isLL ? 16 : 8);
__ br(__ LE, LESS16);
__ align(OptoLoopAlignment);

__ bind(LOOP_COMPARE16);
__ ldp(tmp1, tmp1h, Address(__ post(str1, 16)));
__ ldp(tmp2, tmp2h, Address(__ post(str2, 16)));

__ cmp(tmp1, tmp2);
__ ccmp(tmp1h, tmp2h, 0, Assembler::EQ);
__ br(__ NE, DIFF);
__ sub(cnt2, cnt2, isLL ? 16 : 8);
__ subs(rscratch2, cnt2, isLL ? 16 : 8);
__ br(__ LT, LESS16);

__ ldp(tmp1, tmp1h, Address(__ post(str1, 16)));
__ ldp(tmp2, tmp2h, Address(__ post(str2, 16)));

__ cmp(tmp1, tmp2);
__ ccmp(tmp1h, tmp2h, 0, Assembler::EQ);
__ br(__ NE, DIFF);
__ sub(cnt2, cnt2, isLL ? 16 : 8);

__ subs(rscratch2, cnt2, isLL ? 16 : 8);
__ br(__ GE, LOOP_COMPARE16);
__ cbz(cnt2, LENGTH_DIFF);

__ bind(LESS16);
// each 8 compare
__ subs(cnt2, cnt2, isLL ? 8 : 4);
__ br(__ LE, LESS8);
__ ldr(tmp1, Address(__ post(str1, 8)));
__ ldr(tmp2, Address(__ post(str2, 8)));
__ eor(rscratch2, tmp1, tmp2);
__ cbnz(rscratch2, CAL_DIFFERENCE);
__ sub(cnt2, cnt2, isLL ? 8 : 4);

__ bind(LESS8); // directly load last 8 bytes
if (!isLL) {
__ add(cnt2, cnt2, cnt2);
}
__ ldr(tmp1, Address(str1, cnt2));
__ ldr(tmp2, Address(str2, cnt2));
__ eor(rscratch2, tmp1, tmp2);
__ cbz(rscratch2, LENGTH_DIFF);
__ b(CAL_DIFFERENCE);


__ bind(DIFF);
__ cmp(tmp1, tmp2);
__ csel(tmp1, tmp1, tmp1h, Assembler::NE);
__ csel(tmp2, tmp2, tmp2h, Assembler::NE);
// reuse rscratch2 register for the result of eor instruction
__ eor(rscratch2, tmp1, tmp2);

__ bind(CAL_DIFFERENCE);
__ rev(rscratch2, rscratch2);
__ clz(rscratch2, rscratch2);
__ andr(rscratch2, rscratch2, isLL ? -8 : -16);
__ lsrv(tmp1, tmp1, rscratch2);
__ lsrv(tmp2, tmp2, rscratch2);
if (isLL) {
__ uxtbw(tmp1, tmp1);
__ uxtbw(tmp2, tmp2);
} else {
__ uxthw(tmp1, tmp1);
__ uxthw(tmp2, tmp2);
}

__ subw(result, tmp1, tmp2);
__ b(LENGTH_DIFF);

__ bind(LENGTH_DIFF);
__ ret(lr);
return entry;
}


// r0 = result
// r1 = str1
// r2 = cnt1
@@ -4971,10 +5114,13 @@ class StubGenerator: public StubCodeGenerator {
}

void generate_compare_long_strings() {
StubRoutines::aarch64::_compare_long_string_LL
= generate_compare_long_string_same_encoding(true);
StubRoutines::aarch64::_compare_long_string_UU
= generate_compare_long_string_same_encoding(false);
if (UseStringCompareWithLdp) {
StubRoutines::aarch64::_compare_long_string_LL = generate_compare_long_string_same_encoding_withLdp(true);
StubRoutines::aarch64::_compare_long_string_UU = generate_compare_long_string_same_encoding_withLdp(false);
} else {
StubRoutines::aarch64::_compare_long_string_LL = generate_compare_long_string_same_encoding(true);
StubRoutines::aarch64::_compare_long_string_UU = generate_compare_long_string_same_encoding(false);
}
StubRoutines::aarch64::_compare_long_string_LU
= generate_compare_long_string_different_encoding(true);
StubRoutines::aarch64::_compare_long_string_UL
@@ -0,0 +1,128 @@
/*
* Copyright (c) 2021, Huawei Technologies Co. Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.bench.java.lang;

import org.openjdk.jmh.annotations.*;
import java.util.concurrent.TimeUnit;

/*
* This benchmark naively explores String::compare performance
*/
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
@State(Scope.Benchmark)
public class StringCompare {
@Param({"77", "128"})
int size;

@Param({"7", "15", "31", "47", "63", "71"})
int diff_pos;


private String str1;
private String str2;

@Setup(Level.Trial)
public void init() {
str1 = newString(size, 'c', diff_pos, '1');
str2 = newString(size, 'c', diff_pos, '2');
}

public String newString(int length, char charToFill, int diff_pos, char diff_char) {
if (length > 0) {
char[] array = new char[length];
for (int i = 0; i < length; i++) {
array[i] = charToFill;
}
array[diff_pos] = diff_char;
return new String(array);
}
return "";
}

@Benchmark
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public int compareLLDiffStrings() {
int result = 0;
for (int i = 0; i < 1000; i++) {
result ^= str1.compareTo(str2);
}
return result;
}

@Benchmark
@Fork(jvmArgsAppend = { "-XX:+UseStringCompareWithLdp"})
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public int compareLLDiffStringsWithLdp() {
int result = 0;
for (int i = 0; i < 1000; i++) {
result ^= str1.compareTo(str2);
}
return result;
}

@Benchmark
@Fork(jvmArgsAppend = {"-XX:-CompactStrings"})
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public int compareUUDiffStrings() {
int result = 0;
for (int i = 0; i < 1000; i++) {
result ^= str1.compareTo(str2);
}
return result;
}

@Benchmark
@Fork(jvmArgsAppend = {"-XX:-CompactStrings", "-XX:+UseStringCompareWithLdp"})
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public int compareUUDiffStringsWithLdp() {
int result = 0;
for (int i = 0; i < 1000; i++) {
result ^= str1.compareTo(str2);
}
return result;
}

@Benchmark
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
@Fork(jvmArgsAppend = {"-XX:-CompactStrings", "-XX:-UseCompressedClassPointers"})
public int compareUUDiffStringsTurnOffCCP() {
int result = 0;
for (int i = 0; i < 1000; i++) {
result ^= str1.compareTo(str2);
}
return result;
}

@Benchmark
@Fork(jvmArgsAppend = {"-XX:-CompactStrings", "-XX:-UseCompressedClassPointers", "-XX:+UseStringCompareWithLdp"})
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public int compareUUDiffStringsTurnOffCCPWithLdp() {
int result = 0;
for (int i = 0; i < 1000; i++) {
result ^= str1.compareTo(str2);
}
return result;
}
}