Skip to content
Permalink
Browse files

8229351: AArch64: the const STUB_THRESHOLD in macroAssembler_aarch64.…

…cpp needs to be tuned

Optimize the stub thresholds of string_compare intrinsics

Reviewed-by: adinn, aph, avoitylov
  • Loading branch information
Patrick Zhang
Patrick Zhang committed Aug 12, 2019
1 parent 27e0cdf commit 8c1efbe38b000da71cc87e769e33edfcdb0199f4
@@ -4919,11 +4919,15 @@ void MacroAssembler::string_compare(Register str1, Register str2,
DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
SHORT_LOOP_START, TAIL_CHECK;

const u1 STUB_THRESHOLD = 64 + 8;
bool isLL = ae == StrIntrinsicNode::LL;
bool isLU = ae == StrIntrinsicNode::LU;
bool isUL = ae == StrIntrinsicNode::UL;

// The stub threshold for LL strings is: 72 (64 + 8) chars
// UU: 36 chars, or 72 bytes (valid for the 64-byte large loop with prefetch)
// LU/UL: 24 chars, or 48 bytes (valid for the 16-character loop at least)
const u1 stub_threshold = isLL ? 72 : ((isLU || isUL) ? 24 : 36);

bool str1_isL = isLL || isLU;
bool str2_isL = isLL || isUL;

@@ -4964,7 +4968,7 @@ void MacroAssembler::string_compare(Register str1, Register str2,
cmp(str1, str2);
br(Assembler::EQ, DONE);
ldr(tmp2, Address(str2));
cmp(cnt2, STUB_THRESHOLD);
cmp(cnt2, stub_threshold);
br(GE, STUB);
subsw(cnt2, cnt2, minCharsInWord);
br(EQ, TAIL_CHECK);
@@ -4976,7 +4980,7 @@ void MacroAssembler::string_compare(Register str1, Register str2,
cmp(str1, str2);
br(Assembler::EQ, DONE);
ldr(tmp2, Address(str2));
cmp(cnt2, STUB_THRESHOLD);
cmp(cnt2, stub_threshold);
br(GE, STUB);
subw(cnt2, cnt2, 4);
eor(vtmpZ, T16B, vtmpZ, vtmpZ);
@@ -4992,7 +4996,7 @@ void MacroAssembler::string_compare(Register str1, Register str2,
cmp(str1, str2);
br(Assembler::EQ, DONE);
ldrs(vtmp, Address(str2));
cmp(cnt2, STUB_THRESHOLD);
cmp(cnt2, stub_threshold);
br(GE, STUB);
subw(cnt2, cnt2, 4);
lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
@@ -4110,6 +4110,7 @@ class StubGenerator: public StubCodeGenerator {
__ bind(NO_PREFETCH);
__ subs(cnt2, cnt2, 16);
__ br(__ LT, TAIL);
__ align(OptoLoopAlignment);
__ bind(SMALL_LOOP); // smaller loop
__ subs(cnt2, cnt2, 16);
compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
@@ -4199,6 +4200,7 @@ class StubGenerator: public StubCodeGenerator {
// less than 16 bytes left?
__ subs(cnt2, cnt2, isLL ? 16 : 8);
__ br(__ LT, TAIL);
__ align(OptoLoopAlignment);
__ bind(SMALL_LOOP);
compare_string_16_bytes_same(DIFF, DIFF2);
__ subs(cnt2, cnt2, isLL ? 16 : 8);
@@ -32,12 +32,12 @@
* parameters: <string length>, <maximum string length delta>
* Input parameters for this test are set according to Aarch64
* String::compareTo intrinsic implementation specifics. Aarch64
* implementation has 1, 4, 8 -characters loops for length < 72 and
* implementation has 1, 4, 8 -bytes loops for length < 72 and
* 16, 32, 64 -characters loops for length >= 72. Code is also affected
* by SoftwarePrefetchHintDistance vm flag value.
* @run main/othervm -XX:SoftwarePrefetchHintDistance=192 compiler.intrinsics.string.TestStringCompareToDifferentLength 4 2 5 10 13 17 20 25 71 72 73 88 90 192 193 208 209
* @run main/othervm -XX:SoftwarePrefetchHintDistance=16 compiler.intrinsics.string.TestStringCompareToDifferentLength 4 2 5 10 13 17 20 25 71 72 73 88 90
* @run main/othervm -XX:SoftwarePrefetchHintDistance=-1 compiler.intrinsics.string.TestStringCompareToDifferentLength 4 2 5 10 13 17 20 25 71 72 73 88 90
* @run main/othervm -XX:SoftwarePrefetchHintDistance=192 compiler.intrinsics.string.TestStringCompareToDifferentLength 4 2 5 10 13 17 20 23 24 25 71 72 73 88 90 192 193 208 209
* @run main/othervm -XX:SoftwarePrefetchHintDistance=16 compiler.intrinsics.string.TestStringCompareToDifferentLength 4 2 5 10 13 17 20 23 24 25 71 72 73 88 90
* @run main/othervm -XX:SoftwarePrefetchHintDistance=-1 compiler.intrinsics.string.TestStringCompareToDifferentLength 4 2 5 10 13 17 20 23 24 25 71 72 73 88 90
*/

package compiler.intrinsics.string;
@@ -32,16 +32,16 @@
* String size is specified via commandline. Various size values can
* be specified during intrinsic development in order to test cases
* specific for new or modified intrinsic implementation. Aarch64
* implementation has 1, 4, 8 -characters loops for length < 72 and
* 16, 32, 64 -characters loops for string length >= 72. Code is also
* implementation has 1, 4, 8 -bytes loops for length < 72 and
* 16, 32, 64 -bytes loops for string length >= 72. Code is also
* affected by SoftwarePrefetchHintDistance flag value.
* Test class can also accept "-fullmode" parameter
* with maxLength paramter after it. Then it will iterate through all
* string length values up to maxLength parameter (inclusive). It takes
* a lot of time but is useful for development.
* @run main/othervm -XX:SoftwarePrefetchHintDistance=192 compiler.intrinsics.string.TestStringCompareToSameLength 2 5 10 13 17 20 25 71 72 73 88 90 192 193 208 209
* @run main/othervm -XX:SoftwarePrefetchHintDistance=16 compiler.intrinsics.string.TestStringCompareToSameLength 2 5 10 13 17 20 25 71 72 73 88 90
* @run main/othervm -XX:SoftwarePrefetchHintDistance=-1 compiler.intrinsics.string.TestStringCompareToSameLength 2 5 10 13 17 20 25 71 72 73 88 90
* @run main/othervm -XX:SoftwarePrefetchHintDistance=192 compiler.intrinsics.string.TestStringCompareToSameLength 2 5 10 13 17 20 25 35 36 37 71 72 73 88 90 192 193 208 209
* @run main/othervm -XX:SoftwarePrefetchHintDistance=16 compiler.intrinsics.string.TestStringCompareToSameLength 2 5 10 13 17 20 25 35 36 37 71 72 73 88 90
* @run main/othervm -XX:SoftwarePrefetchHintDistance=-1 compiler.intrinsics.string.TestStringCompareToSameLength 2 5 10 13 17 20 25 35 36 37 71 72 73 88 90
*/

package compiler.intrinsics.string;

0 comments on commit 8c1efbe

Please sign in to comment.
You can’t perform that action at this time.