Skip to content

Commit 17e3769

Browse files
eirbjoAlan Bateman
authored and
Alan Bateman
committed
8302871: Speed up StringLatin1.regionMatchesCI
Reviewed-by: redestad, martin, alanb
1 parent b4ea807 commit 17e3769

File tree

4 files changed

+161
-18
lines changed

4 files changed

+161
-18
lines changed

src/java.base/share/classes/java/lang/CharacterDataLatin1.java.template

+31-8
Original file line numberDiff line numberDiff line change
@@ -138,10 +138,11 @@ class CharacterDataLatin1 extends CharacterData {
138138
if (ch < 'A') { // Fast path for low code points
139139
return ch;
140140
}
141-
int l = ch | 0x20; // Lowercase using 'oldest ASCII trick in the book'
142-
if (l <= 'z' // In range a-z
143-
|| (l >= 0xE0 && l <= 0xFE && l != 0xF7)) { // ..or agrave-thorn, excluding division
144-
return l;
141+
// ASCII and Latin-1 were designed to optimize case-twiddling operations
142+
int lower = ch | 0x20;
143+
if (lower <= 'z' // In range a-z
144+
|| (lower >= 0xE0 && lower <= 0xFE && lower != 0xF7)) { // ..or agrave-thorn, excluding division
145+
return lower;
145146
}
146147
return ch;
147148
}
@@ -150,10 +151,11 @@ class CharacterDataLatin1 extends CharacterData {
150151
if (ch < 'a') { // Fast path for low code points
151152
return ch;
152153
}
153-
int U = ch & 0xDF; // Uppercase using 'oldest ASCII trick in the book'
154-
if (U <= 'Z' // In range A-Z
155-
|| (U >= 0xC0 && U <= 0xDE && U != 0xD7)) { // ..or Agrave-Thorn, excluding multiplication
156-
return U;
154+
// ASCII and Latin-1 were designed to optimize case-twiddling operations
155+
int upper = ch & 0xDF;
156+
if (upper <= 'Z' // In range A-Z
157+
|| (upper >= 0xC0 && upper <= 0xDE && upper != 0xD7)) { // ..or Agrave-Thorn, not multiplication
158+
return upper;
157159
}
158160

159161
// Special-case for 'y with Diaeresis' which uppercases out of latin1
@@ -167,6 +169,27 @@ class CharacterDataLatin1 extends CharacterData {
167169
return ch;
168170
}
169171

172+
/**
173+
* Compares two latin1 code points, ignoring case considerations
174+
*
175+
* @param b1 byte representing a latin1 code point
176+
* @param b2 another byte representing a latin1 code point
177+
* @return true if the two bytes are considered equals ignoring case in latin1
178+
*/
179+
static boolean equalsIgnoreCase(byte b1, byte b2) {
180+
if (b1 == b2) {
181+
return true;
182+
}
183+
// ASCII and Latin-1 were designed to optimize case-twiddling operations
184+
int upper = b1 & 0xDF;
185+
if (upper < 'A') {
186+
return false; // Low ASCII
187+
}
188+
return (upper <= 'Z' // In range A-Z
189+
|| (upper >= 0xC0 && upper <= 0XDE && upper != 0xD7)) // ..or A-grave-Thorn, not multiplication
190+
&& upper == (b2 & 0xDF); // b2 has same uppercase
191+
}
192+
170193
int toTitleCase(int ch) {
171194
return toUpperCase(ch);
172195
}

src/java.base/share/classes/java/lang/StringLatin1.java

+3-8
Original file line numberDiff line numberDiff line change
@@ -384,14 +384,9 @@ public static boolean regionMatchesCI(byte[] value, int toffset,
384384
byte[] other, int ooffset, int len) {
385385
int last = toffset + len;
386386
while (toffset < last) {
387-
char c1 = (char)(value[toffset++] & 0xff);
388-
char c2 = (char)(other[ooffset++] & 0xff);
389-
if (c1 == c2) {
390-
continue;
391-
}
392-
int u1 = CharacterDataLatin1.instance.toUpperCase(c1);
393-
int u2 = CharacterDataLatin1.instance.toUpperCase(c2);
394-
if (u1 == u2) {
387+
byte b1 = value[toffset++];
388+
byte b2 = other[ooffset++];
389+
if (CharacterDataLatin1.equalsIgnoreCase(b1, b2)) {
395390
continue;
396391
}
397392
return false;

test/jdk/java/lang/String/CompactString/EqualsIgnoreCase.java

+31-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2015, 2023, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -25,10 +25,12 @@
2525
import org.testng.annotations.Test;
2626

2727
import static org.testng.Assert.assertEquals;
28+
import static org.testng.Assert.assertFalse;
29+
import static org.testng.Assert.assertTrue;
2830

2931
/*
3032
* @test
31-
* @bug 8077559 8248655
33+
* @bug 8077559 8248655 8302871
3234
* @summary Tests Compact String. This one is for String.equalsIgnoreCase.
3335
* @run testng/othervm -XX:+CompactStrings EqualsIgnoreCase
3436
* @run testng/othervm -XX:-CompactStrings EqualsIgnoreCase
@@ -75,4 +77,31 @@ public void testEqualsIgnoreCase(String str, String anotherString,
7577
source));
7678
});
7779
}
80+
81+
/**
82+
* Exhaustively check that all 256x256 latin1 code point pairs are equalsIgnoreCased
83+
* in a manner consistent with Character.toLowerCase(Character.toUpperCase(c));
84+
*/
85+
@Test
86+
public void checkConsistencyWithCharacterUppercaseLowerCase() {
87+
for (char a = 0; a < 256; a++) {
88+
for (char b = 0; b < 256; b++) {
89+
90+
int caseFoldA = Character.toLowerCase(Character.toUpperCase(a));
91+
int caseFoldB = Character.toLowerCase(Character.toUpperCase(b));
92+
93+
String astr = Character.toString(a);
94+
String bstr = Character.toString(b);
95+
96+
// If characters fold to the same lowercase, their strings should equalsIgnoreCase:
97+
if (caseFoldA == caseFoldB) {
98+
assertTrue(astr.equalsIgnoreCase(bstr),
99+
"Expected %s to equalsIgnoreCase %s".formatted(astr, bstr));
100+
} else {
101+
assertFalse(astr.equalsIgnoreCase(bstr),
102+
"Expected %s to not equalsIgnoreCase %s".formatted(astr, bstr));
103+
}
104+
}
105+
}
106+
}
78107
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
/*
2+
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*/
23+
package org.openjdk.bench.java.lang;
24+
25+
import org.openjdk.jmh.annotations.*;
26+
27+
import java.util.concurrent.TimeUnit;
28+
29+
/*
30+
* This benchmark naively explores String::regionMatches, ignoring case
31+
*/
32+
33+
public class RegionMatchesIC {
34+
35+
@BenchmarkMode(Mode.AverageTime)
36+
@OutputTimeUnit(TimeUnit.NANOSECONDS)
37+
@State(Scope.Benchmark)
38+
@Warmup(iterations = 5, time = 1)
39+
@Measurement(iterations = 5, time = 1)
40+
@Fork(value = 3)
41+
public static class Latin1 {
42+
43+
@Param({"1024"})
44+
public int size;
45+
46+
@Param({"ascii-match",
47+
"ascii-mismatch",
48+
"number-match",
49+
"number-mismatch",
50+
"lat1-match",
51+
"lat1-mismatch"})
52+
String codePoints;
53+
private String leftString;
54+
private String rightString;
55+
56+
@Setup
57+
public void setup() {
58+
59+
switch (codePoints) {
60+
case "ascii-match" -> {
61+
leftString = "a".repeat(size);
62+
rightString = "A".repeat(size);
63+
}
64+
case "ascii-mismatch" -> {
65+
leftString = "a".repeat(size);
66+
rightString = "b".repeat(size);
67+
}
68+
case "number-match" -> {
69+
leftString = "7".repeat(size);
70+
rightString = "7".repeat(size);
71+
}
72+
case "number-mismatch" -> {
73+
leftString = "7".repeat(size);
74+
rightString = "9".repeat(size);
75+
}
76+
case "lat1-match" -> {
77+
leftString = "\u00e5".repeat(size);
78+
rightString = "\u00c5".repeat(size);
79+
}
80+
case "lat1-mismatch" -> {
81+
leftString = "\u00e5".repeat(size);
82+
rightString = "\u00c6".repeat(size);
83+
}
84+
default -> throw new IllegalArgumentException("Unsupported coding: " + codePoints);
85+
}
86+
// Make sure strings do not String.equals by adding a prefix
87+
leftString = "l" + leftString;
88+
rightString = "r" + rightString;
89+
}
90+
91+
@Benchmark
92+
public boolean regionMatchesIC() {
93+
return leftString.regionMatches(true, 1, rightString, 1, size);
94+
}
95+
}
96+
}

0 commit comments

Comments
 (0)