From a0b09c9f1d18ea2abdd792ae07c449b66fab7bb2 Mon Sep 17 00:00:00 2001 From: redestad Date: Fri, 10 Feb 2023 13:36:03 +0100 Subject: [PATCH 1/7] Speed up various String comparison methods with ArraysSupport.mismatch --- .../share/classes/java/lang/String.java | 60 ++++++++----------- .../share/classes/java/lang/StringLatin1.java | 8 +-- .../openjdk/bench/java/lang/StringOther.java | 7 +++ 3 files changed, 33 insertions(+), 42 deletions(-) diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java index 1897a06cd6008..3a845a5c08532 100644 --- a/src/java.base/share/classes/java/lang/String.java +++ b/src/java.base/share/classes/java/lang/String.java @@ -51,6 +51,7 @@ import java.util.stream.Stream; import java.util.stream.StreamSupport; +import jdk.internal.util.ArraysSupport; import jdk.internal.util.Preconditions; import jdk.internal.vm.annotation.ForceInline; import jdk.internal.vm.annotation.IntrinsicCandidate; @@ -1272,8 +1273,7 @@ private static void throwUnmappable(int off) { } private static void throwUnmappable(byte[] val) { - int dp = 0; - while (dp < val.length && val[dp] >=0) { dp++; } + int dp = StringCoding.countPositives(val, 0, val.length); throwUnmappable(dp); } @@ -1870,23 +1870,17 @@ private boolean nonSyncContentEquals(AbstractStringBuilder sb) { if (len != sb.length()) { return false; } - byte v1[] = value; - byte v2[] = sb.getValue(); + byte[] v1 = value; + byte[] v2 = sb.getValue(); byte coder = coder(); if (coder == sb.getCoder()) { - int n = v1.length; - for (int i = 0; i < n; i++) { - if (v1[i] != v2[i]) { - return false; - } - } + return v1.length <= v2.length && ArraysSupport.mismatch(v1, v2, v1.length) < 0; } else { if (coder != LATIN1) { // utf16 str and latin1 abs can never be "equal" return false; } return StringUTF16.contentEquals(v1, v2, len); } - return true; } /** @@ -2024,8 +2018,8 @@ public boolean equalsIgnoreCase(String anotherString) { * lexicographically greater than the string argument. */ public int compareTo(String anotherString) { - byte v1[] = value; - byte v2[] = anotherString.value; + byte[] v1 = value; + byte[] v2 = anotherString.value; byte coder = coder(); if (coder == anotherString.coder()) { return coder == LATIN1 ? StringLatin1.compareTo(v1, v2) @@ -2060,8 +2054,8 @@ private static class CaseInsensitiveComparator private static final long serialVersionUID = 8575799808933029326L; public int compare(String s1, String s2) { - byte v1[] = s1.value; - byte v2[] = s2.value; + byte[] v1 = s1.value; + byte[] v2 = s2.value; byte coder = s1.coder(); if (coder == s2.coder()) { return coder == LATIN1 ? StringLatin1.compareToCI(v1, v2) @@ -2136,25 +2130,22 @@ public int compareToIgnoreCase(String str) { * {@code false} otherwise. */ public boolean regionMatches(int toffset, String other, int ooffset, int len) { - byte tv[] = value; - byte ov[] = other.value; // Note: toffset, ooffset, or len might be near -1>>>1. if ((ooffset < 0) || (toffset < 0) || (toffset > (long)length() - len) || (ooffset > (long)other.length() - len)) { return false; } + byte[] tv = value; + byte[] ov = other.value; byte coder = coder(); if (coder == other.coder()) { - if (!isLatin1() && (len > 0)) { - toffset = toffset << 1; - ooffset = ooffset << 1; - len = len << 1; - } - while (len-- > 0) { - if (tv[toffset++] != ov[ooffset++]) { - return false; - } + if (coder == LATIN1) { + return ArraysSupport.mismatch(tv, toffset, + ov, ooffset, len) < 0; + } else { + return ArraysSupport.mismatch(tv, toffset << UTF16, + ov, ooffset << UTF16, len << UTF16) < 0; } } else { if (coder == LATIN1) { @@ -2235,8 +2226,8 @@ public boolean regionMatches(boolean ignoreCase, int toffset, || (ooffset > (long)other.length() - len)) { return false; } - byte tv[] = value; - byte ov[] = other.value; + byte[] tv = value; + byte[] ov = other.value; byte coder = coder(); if (coder == other.coder()) { return coder == LATIN1 @@ -2270,18 +2261,15 @@ public boolean startsWith(String prefix, int toffset) { if (toffset < 0 || toffset > length() - prefix.length()) { return false; } - byte ta[] = value; - byte pa[] = prefix.value; + byte[] ta = value; + byte[] pa = prefix.value; int po = 0; int pc = pa.length; byte coder = coder(); if (coder == prefix.coder()) { - int to = (coder == LATIN1) ? toffset : toffset << 1; - while (po < pc) { - if (ta[to++] != pa[po++]) { - return false; - } - } + toffset <<= coder; + return ArraysSupport.mismatch(ta, toffset, + pa, 0, pc) < 0; } else { if (coder == LATIN1) { // && pcoder == UTF16 return false; diff --git a/src/java.base/share/classes/java/lang/StringLatin1.java b/src/java.base/share/classes/java/lang/StringLatin1.java index 526d752f05833..7749597a24a59 100644 --- a/src/java.base/share/classes/java/lang/StringLatin1.java +++ b/src/java.base/share/classes/java/lang/StringLatin1.java @@ -109,12 +109,8 @@ public static int compareTo(byte[] value, byte[] other) { public static int compareTo(byte[] value, byte[] other, int len1, int len2) { int lim = Math.min(len1, len2); - for (int k = 0; k < lim; k++) { - if (value[k] != other[k]) { - return getChar(value, k) - getChar(other, k); - } - } - return len1 - len2; + int k = ArraysSupport.mismatch(value, other, lim); + return (k < 0) ? len1 - len2 : getChar(value, k) - getChar(other, k); } @IntrinsicCandidate diff --git a/test/micro/org/openjdk/bench/java/lang/StringOther.java b/test/micro/org/openjdk/bench/java/lang/StringOther.java index e6bee3f037690..d08327fe1b064 100644 --- a/test/micro/org/openjdk/bench/java/lang/StringOther.java +++ b/test/micro/org/openjdk/bench/java/lang/StringOther.java @@ -100,4 +100,11 @@ public void regionMatchesLatin1(Blackhole bh) { bh.consume(str2.regionMatches(true, 16, str1UP, 0, 8)); bh.consume(str3.regionMatches(true, 6, str4, 1, 2)); } + + @Benchmark + public void regionMatchesLatin1CaseSensitive(Blackhole bh) { + bh.consume(str1.regionMatches(false, 0, str2, 0, str1.length())); + bh.consume(str2.regionMatches(false, 16, str1UP, 0, 8)); + bh.consume(str3.regionMatches(false, 6, str4, 1, 2)); + } } From 6e1af0da59942e0256627aa31c170e3f320dd227 Mon Sep 17 00:00:00 2001 From: redestad Date: Fri, 10 Feb 2023 14:25:02 +0100 Subject: [PATCH 2/7] Add a few micros, apply optimization to StringUTF16.compareValues --- .../share/classes/java/lang/StringUTF16.java | 8 ++++---- .../org/openjdk/bench/java/lang/StringBuilders.java | 13 +++++++++++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/java.base/share/classes/java/lang/StringUTF16.java b/src/java.base/share/classes/java/lang/StringUTF16.java index c65435c0ac1ac..a1bd3eb08fca7 100644 --- a/src/java.base/share/classes/java/lang/StringUTF16.java +++ b/src/java.base/share/classes/java/lang/StringUTF16.java @@ -299,12 +299,12 @@ public static int compareTo(byte[] value, byte[] other, int len1, int len2) { private static int compareValues(byte[] value, byte[] other, int len1, int len2) { int lim = Math.min(len1, len2); - for (int k = 0; k < lim; k++) { + int k = ArraysSupport.mismatch(value, other, lim << 1); + if (k >= 0) { + k >>= 1; char c1 = getChar(value, k); char c2 = getChar(other, k); - if (c1 != c2) { - return c1 - c2; - } + return c1 - c2; } return len1 - len2; } diff --git a/test/micro/org/openjdk/bench/java/lang/StringBuilders.java b/test/micro/org/openjdk/bench/java/lang/StringBuilders.java index e2a72a9ecb89c..40f41659e7c00 100644 --- a/test/micro/org/openjdk/bench/java/lang/StringBuilders.java +++ b/test/micro/org/openjdk/bench/java/lang/StringBuilders.java @@ -51,7 +51,9 @@ public class StringBuilders { private String[] str3p9p8; private String[] str22p40p31; private StringBuilder sbLatin1; + private StringBuilder sbLatin2; private StringBuilder sbUtf16; + private StringBuilder sbUtf17; @Setup public void setup() { @@ -64,7 +66,9 @@ public void setup() { str3p9p8 = new String[]{"123", "123456789", "12345678"}; str22p40p31 = new String[]{"1234567890123456789012", "1234567890123456789012345678901234567890", "1234567890123456789012345678901"}; sbLatin1 = new StringBuilder("Latin1 string"); + sbLatin2 = new StringBuilder("Latin1 string"); sbUtf16 = new StringBuilder("UTF-\uFF11\uFF16 string"); + sbUtf17 = new StringBuilder("UTF-\uFF11\uFF16 string"); } @Benchmark @@ -250,6 +254,15 @@ public String toStringCharWithFloat8() { return result.toString(); } + @Benchmark + public int compareToLatin1() { + return sbLatin1.compareTo(sbLatin2); + } + + @Benchmark + public int compareToUTF16() { + return sbUtf16.compareTo(sbUtf17); + } @Benchmark public String toStringCharWithMixed8() { From 372f1377709f207e83b73b637e89d5d9b53d667a Mon Sep 17 00:00:00 2001 From: redestad Date: Fri, 10 Feb 2023 16:49:26 +0100 Subject: [PATCH 3/7] Revert UTF16.compareValues --- src/java.base/share/classes/java/lang/StringUTF16.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/java.base/share/classes/java/lang/StringUTF16.java b/src/java.base/share/classes/java/lang/StringUTF16.java index a1bd3eb08fca7..c65435c0ac1ac 100644 --- a/src/java.base/share/classes/java/lang/StringUTF16.java +++ b/src/java.base/share/classes/java/lang/StringUTF16.java @@ -299,12 +299,12 @@ public static int compareTo(byte[] value, byte[] other, int len1, int len2) { private static int compareValues(byte[] value, byte[] other, int len1, int len2) { int lim = Math.min(len1, len2); - int k = ArraysSupport.mismatch(value, other, lim << 1); - if (k >= 0) { - k >>= 1; + for (int k = 0; k < lim; k++) { char c1 = getChar(value, k); char c2 = getChar(other, k); - return c1 - c2; + if (c1 != c2) { + return c1 - c2; + } } return len1 - len2; } From db97c8a794ab3da6184bb6688d88a933374acb01 Mon Sep 17 00:00:00 2001 From: redestad Date: Fri, 10 Feb 2023 16:56:11 +0100 Subject: [PATCH 4/7] Add micro from @eirbjo --- .../bench/java/lang/StringComparisons.java | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 test/micro/org/openjdk/bench/java/lang/StringComparisons.java diff --git a/test/micro/org/openjdk/bench/java/lang/StringComparisons.java b/test/micro/org/openjdk/bench/java/lang/StringComparisons.java new file mode 100644 index 0000000000000..dbc93406403ac --- /dev/null +++ b/test/micro/org/openjdk/bench/java/lang/StringComparisons.java @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package org.openjdk.bench.java.lang; + +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.Blackhole; + +import java.util.concurrent.TimeUnit; + +/* + * This benchmark naively explores String::startsWith and other String comparison + * methods performance + */ +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Benchmark) +@Warmup(iterations = 5, time = 1) +@Measurement(iterations = 5, time = 1) +@Fork(value = 3) +public class StringComparisons { + + public String longString = new String("jkljayeiksdhsdkjkdjkldfnbmnbdgfaddfflsdhbdkj"); + public String equallyLongString = new String("jkljayeiksdhsdkjkdjkldfnbmnbdgfaddfflsdhbdkj"); + + public String longerString = new String("jkljayeiksdhsdkjkdjkldfnbmnbdgfaddfflsdhbdkj_"); + + public String endsWithA= new String("jkljayeiksdhsdkjkdjkldfnbmnbdgfaddfflsdhbdkjA"); + public String endsWithB= new String("jkljayeiksdhsdkjkdjkldfnbmnbdgfaddfflsdhbdkjB"); + + @Benchmark + public void startsWith(Blackhole blackhole) { + blackhole.consume(longerString.startsWith(longString)); + } + + @Benchmark + public void compareTo(Blackhole blackhole) { + blackhole.consume(endsWithA.compareTo(endsWithB)); + } + + @Benchmark + public void regionMatches(Blackhole blackhole) { + blackhole.consume(endsWithA.regionMatches( 0, endsWithB, 0, endsWithB.length())); + } + + @Benchmark + public void stringEquals(Blackhole blackhole) { + blackhole.consume(endsWithA.equals( equallyLongString)); + } +} From f5b3b09612977d1c23e9e95b39af2276d738f926 Mon Sep 17 00:00:00 2001 From: redestad Date: Fri, 10 Feb 2023 17:41:03 +0100 Subject: [PATCH 5/7] Expand micro coverage --- .../bench/java/lang/StringComparisons.java | 44 ++++++++++++------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/test/micro/org/openjdk/bench/java/lang/StringComparisons.java b/test/micro/org/openjdk/bench/java/lang/StringComparisons.java index dbc93406403ac..2cf29c533a41a 100644 --- a/test/micro/org/openjdk/bench/java/lang/StringComparisons.java +++ b/test/micro/org/openjdk/bench/java/lang/StringComparisons.java @@ -23,13 +23,12 @@ package org.openjdk.bench.java.lang; import org.openjdk.jmh.annotations.*; -import org.openjdk.jmh.infra.Blackhole; import java.util.concurrent.TimeUnit; /* - * This benchmark naively explores String::startsWith and other String comparison - * methods performance + * This benchmark naively explores String::startsWith and other String + * comparison methods */ @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) @@ -39,31 +38,44 @@ @Fork(value = 3) public class StringComparisons { - public String longString = new String("jkljayeiksdhsdkjkdjkldfnbmnbdgfaddfflsdhbdkj"); - public String equallyLongString = new String("jkljayeiksdhsdkjkdjkldfnbmnbdgfaddfflsdhbdkj"); + @Param({"6", "15", "1024"}) + public int size; - public String longerString = new String("jkljayeiksdhsdkjkdjkldfnbmnbdgfaddfflsdhbdkj_"); + @Param({"true", "false"}) + public boolean utf16; - public String endsWithA= new String("jkljayeiksdhsdkjkdjkldfnbmnbdgfaddfflsdhbdkjA"); - public String endsWithB= new String("jkljayeiksdhsdkjkdjkldfnbmnbdgfaddfflsdhbdkjB"); + public String string; + public String equalString; + + public String endsWithA; + public String endsWithB; + + @Setup + public void setup() { + String c = utf16 ? "\uff11" : "c"; + string = c.repeat(size); + equalString = c.repeat(size); + endsWithA = c.repeat(size).concat("A"); + endsWithB = c.repeat(size).concat("B"); + } @Benchmark - public void startsWith(Blackhole blackhole) { - blackhole.consume(longerString.startsWith(longString)); + public boolean startsWith() { + return endsWithA.startsWith(string); } @Benchmark - public void compareTo(Blackhole blackhole) { - blackhole.consume(endsWithA.compareTo(endsWithB)); + public int compareTo() { + return endsWithA.compareTo(endsWithB); } @Benchmark - public void regionMatches(Blackhole blackhole) { - blackhole.consume(endsWithA.regionMatches( 0, endsWithB, 0, endsWithB.length())); + public boolean regionMatches() { + return endsWithA.regionMatches(0, endsWithB, 0, endsWithB.length()); } @Benchmark - public void stringEquals(Blackhole blackhole) { - blackhole.consume(endsWithA.equals( equallyLongString)); + public boolean stringEquals() { + return endsWithA.equals(endsWithB); } } From 6cac333d8f9f34e16168447c60f28a6b0d31623f Mon Sep 17 00:00:00 2001 From: redestad Date: Mon, 13 Feb 2023 10:12:35 +0100 Subject: [PATCH 6/7] Remove overlapping micros, extend testing to endsWith, regionCI and some minor improvements to String::regionMatches --- .../share/classes/java/lang/String.java | 12 ++++---- .../bench/java/lang/StringComparisons.java | 16 ++++++---- .../openjdk/bench/java/lang/StringOther.java | 30 ------------------- 3 files changed, 17 insertions(+), 41 deletions(-) diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java index 3a845a5c08532..85c7d130ab00e 100644 --- a/src/java.base/share/classes/java/lang/String.java +++ b/src/java.base/share/classes/java/lang/String.java @@ -2140,13 +2140,13 @@ public boolean regionMatches(int toffset, String other, int ooffset, int len) { byte[] ov = other.value; byte coder = coder(); if (coder == other.coder()) { - if (coder == LATIN1) { - return ArraysSupport.mismatch(tv, toffset, - ov, ooffset, len) < 0; - } else { - return ArraysSupport.mismatch(tv, toffset << UTF16, - ov, ooffset << UTF16, len << UTF16) < 0; + if (coder == UTF16) { + toffset <<= UTF16; + ooffset <<= UTF16; + len <<= UTF16; } + return ArraysSupport.mismatch(tv, toffset, + ov, ooffset, len) < 0; } else { if (coder == LATIN1) { while (len-- > 0) { diff --git a/test/micro/org/openjdk/bench/java/lang/StringComparisons.java b/test/micro/org/openjdk/bench/java/lang/StringComparisons.java index 2cf29c533a41a..b18b56bee2a67 100644 --- a/test/micro/org/openjdk/bench/java/lang/StringComparisons.java +++ b/test/micro/org/openjdk/bench/java/lang/StringComparisons.java @@ -46,9 +46,9 @@ public class StringComparisons { public String string; public String equalString; - public String endsWithA; public String endsWithB; + public String startsWithA; @Setup public void setup() { @@ -57,6 +57,7 @@ public void setup() { equalString = c.repeat(size); endsWithA = c.repeat(size).concat("A"); endsWithB = c.repeat(size).concat("B"); + startsWithA = "A" + (c.repeat(size)); } @Benchmark @@ -65,8 +66,8 @@ public boolean startsWith() { } @Benchmark - public int compareTo() { - return endsWithA.compareTo(endsWithB); + public boolean endsWith() { + return startsWithA.endsWith(string); } @Benchmark @@ -75,7 +76,12 @@ public boolean regionMatches() { } @Benchmark - public boolean stringEquals() { - return endsWithA.equals(endsWithB); + public boolean regionMatchesRange() { + return startsWithA.regionMatches(1, endsWithB, 0, endsWithB.length() - 1); + } + + @Benchmark + public boolean regionMatchesCI() { + return endsWithA.regionMatches(true, 0, endsWithB, 0, endsWithB.length()); } } diff --git a/test/micro/org/openjdk/bench/java/lang/StringOther.java b/test/micro/org/openjdk/bench/java/lang/StringOther.java index d08327fe1b064..99ebca8c3d1d1 100644 --- a/test/micro/org/openjdk/bench/java/lang/StringOther.java +++ b/test/micro/org/openjdk/bench/java/lang/StringOther.java @@ -49,17 +49,9 @@ public class StringOther { private String testString; private Random rnd; - private String str1, str2, str3, str4; - private String str1UP; - @Setup public void setup() { testString = "Idealism is what precedes experience; cynicism is what follows."; - str1 = "vm-guld vm-guld vm-guld"; - str1UP = str1.toUpperCase(Locale.ROOT); - str2 = "vm-guld vm-guld vm-guldx"; - str3 = "vm-guld vm-guld vm-guldx"; - str4 = "adadaskasdjierudks"; rnd = new Random(); } @@ -70,15 +62,6 @@ public void charAt(Blackhole bh) { } } - @Benchmark - public int compareTo() { - int total = 0; - total += str1.compareTo(str2); - total += str2.compareTo(str3); - total += str3.compareTo(str4); - return total; - } - /** * Creates (hopefully) unique Strings and internizes them, creating a zillion forgettable strings in the JVMs string * pool. @@ -94,17 +77,4 @@ public String internUnique() { return String.valueOf(rnd.nextInt()).intern(); } - @Benchmark - public void regionMatchesLatin1(Blackhole bh) { - bh.consume(str1.regionMatches(true, 0, str2, 0, str1.length())); - bh.consume(str2.regionMatches(true, 16, str1UP, 0, 8)); - bh.consume(str3.regionMatches(true, 6, str4, 1, 2)); - } - - @Benchmark - public void regionMatchesLatin1CaseSensitive(Blackhole bh) { - bh.consume(str1.regionMatches(false, 0, str2, 0, str1.length())); - bh.consume(str2.regionMatches(false, 16, str1UP, 0, 8)); - bh.consume(str3.regionMatches(false, 6, str4, 1, 2)); - } } From b63954bf505db85210fcab8f492c205741a813a0 Mon Sep 17 00:00:00 2001 From: redestad Date: Mon, 13 Feb 2023 17:01:27 +0100 Subject: [PATCH 7/7] Clarify coder shift in startsWith --- src/java.base/share/classes/java/lang/String.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java index 85c7d130ab00e..1b5b5efb72b08 100644 --- a/src/java.base/share/classes/java/lang/String.java +++ b/src/java.base/share/classes/java/lang/String.java @@ -2267,7 +2267,9 @@ public boolean startsWith(String prefix, int toffset) { int pc = pa.length; byte coder = coder(); if (coder == prefix.coder()) { - toffset <<= coder; + if (coder == UTF16) { + toffset <<= UTF16; + } return ArraysSupport.mismatch(ta, toffset, pa, 0, pc) < 0; } else {