Skip to content

Commit ef1f7bd

Browse files
eirbjonaotoj
authored andcommitted
8302877: Speed up latin1 case conversions
Reviewed-by: naoto, redestad
1 parent 1ea5f9f commit ef1f7bd

File tree

5 files changed

+165
-22
lines changed

5 files changed

+165
-22
lines changed

src/java.base/share/classes/java/lang/CharacterDataLatin1.java.template

+25-19
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2002, 2021, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2002, 2023, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -135,30 +135,36 @@ class CharacterDataLatin1 extends CharacterData {
135135
}
136136

137137
int toLowerCase(int ch) {
138-
int mapChar = ch;
139-
int val = getProperties(ch);
140-
141-
if (((val & $$maskLowerCase) != 0) &&
142-
((val & $$maskCaseOffset) != $$maskCaseOffset)) {
143-
int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset);
144-
mapChar = ch + offset;
138+
if (ch < 'A') { // Fast path for low code points
139+
return ch;
145140
}
146-
return mapChar;
141+
int l = ch | 0x20; // Lowercase using 'oldest ASCII trick in the book'
142+
if (l <= 'z' // In range a-z
143+
|| (l >= 0xE0 && l <= 0xFE && l != 0xF7)) { // ..or agrave-thorn, excluding division
144+
return l;
145+
}
146+
return ch;
147147
}
148148

149149
int toUpperCase(int ch) {
150-
int mapChar = ch;
151-
int val = getProperties(ch);
150+
if (ch < 'a') { // Fast path for low code points
151+
return ch;
152+
}
153+
int U = ch & 0xDF; // Uppercase using 'oldest ASCII trick in the book'
154+
if (U <= 'Z' // In range A-Z
155+
|| (U >= 0xC0 && U <= 0xDE && U != 0xD7)) { // ..or Agrave-Thorn, excluding multiplication
156+
return U;
157+
}
152158

153-
if ((val & $$maskUpperCase) != 0) {
154-
if ((val & $$maskCaseOffset) != $$maskCaseOffset) {
155-
int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset);
156-
mapChar = ch - offset;
157-
} else if (ch == 0x00B5) {
158-
mapChar = 0x039C;
159-
}
159+
// Special-case for 'y with Diaeresis' which uppercases out of latin1
160+
if (ch == 0xFF) {
161+
return 0x178; // Capital Letter Y with Diaeresis
160162
}
161-
return mapChar;
163+
// Special-case for 'Micro Sign' which uppercases out of latin1
164+
if (ch == 0xB5) {
165+
return 0x39C; // Greek Capital Letter Mu
166+
}
167+
return ch;
162168
}
163169

164170
int toTitleCase(int ch) {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
/*
2+
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*/
23+
24+
import org.testng.annotations.Test;
25+
26+
import static org.testng.Assert.assertEquals;
27+
import static org.testng.Assert.fail;
28+
29+
/**
30+
* @test
31+
* @bug 8302877
32+
* @summary Provides exhaustive verification of Character.toUpperCase and Character.toLowerCase
33+
* for all code points in the latin1 range 0-255.
34+
* @run testng Latin1CaseConversion
35+
*/
36+
public class Latin1CaseConversion {
37+
38+
@Test
39+
public void shouldUpperCaseAndLowerCaseLatin1() {
40+
for (int c = 0; c < 256; c++) {
41+
int upper = Character.toUpperCase(c);
42+
int lower = Character.toLowerCase(c);
43+
44+
if (c < 0x41) { // Before A
45+
assertUnchanged(upper, lower, c);
46+
} else if (c <= 0x5A) { // A-Z
47+
assertEquals(upper, c);
48+
assertEquals(lower, c + 32);
49+
} else if (c < 0x61) { // Between Z and a
50+
assertUnchanged(upper, lower, c);
51+
} else if (c <= 0x7A) { // a-z
52+
assertEquals(upper, c - 32);
53+
assertEquals(lower, c);
54+
} else if (c < 0xB5) { // Between z and Micro Sign
55+
assertUnchanged(upper, lower, c);
56+
} else if (c == 0xB5) { // Special case for Micro Sign
57+
assertEquals(upper, 0x39C);
58+
assertEquals(lower, c);
59+
} else if (c < 0xC0) { // Between my and A-grave
60+
assertUnchanged(upper, lower, c);
61+
} else if (c < 0xD7) { // A-grave - O with Diaeresis
62+
assertEquals(upper, c);
63+
assertEquals(lower, c + 32);
64+
} else if (c == 0xD7) { // Multiplication
65+
assertUnchanged(upper, lower, c);
66+
} else if (c <= 0xDE) { // O with slash - Thorn
67+
assertEquals(upper, c);
68+
assertEquals(lower, c + 32);
69+
} else if (c == 0xDF) { // Sharp s
70+
assertUnchanged(upper, lower, c);
71+
} else if (c < 0xF7) { // a-grave - divsion
72+
assertEquals(upper, c - 32);
73+
assertEquals(lower, c);
74+
} else if (c == 0xF7) { // Division
75+
assertUnchanged(upper, lower, c);
76+
} else if (c < 0xFF) { // o with slash - thorn
77+
assertEquals(upper, c - 32);
78+
assertEquals(lower, c);
79+
} else if (c == 0XFF) { // Special case for y with Diaeresis
80+
assertEquals(upper, 0x178);
81+
assertEquals(lower, c);
82+
} else {
83+
fail("Uncovered code point: " + Integer.toHexString(c));
84+
}
85+
}
86+
}
87+
88+
private static void assertUnchanged(int upper, int lower, int c) {
89+
assertEquals(upper, c);
90+
assertEquals(lower, c);
91+
}
92+
}

test/jdk/sun/text/resources/LocaleData

+1-1
Original file line numberDiff line numberDiff line change
@@ -2230,7 +2230,7 @@ FormatData/ar_YE/NumberElements/8=\u2030
22302230
FormatData/ar_YE/NumberElements/9=\u221e
22312231
FormatData/ar_YE/NumberElements/10=\ufffd
22322232

2233-
# bug #4113654 (this is obviously not an exchaustive test; I'm trying it here for a single
2233+
# bug #4113654 (this is obviously not an exhaustive test; I'm trying it here for a single
22342234
# inheritance chain only. This bug fix also gets tested fairly well by the tests for all
22352235
# the other bugs as given above)
22362236
FormatData//NumberPatterns/0=#,##0.###;-#,##0.###

test/jdk/sun/text/resources/LocaleData.cldr

+1-1
Original file line numberDiff line numberDiff line change
@@ -2185,7 +2185,7 @@ FormatData/ar_YE/arab.NumberElements/8=\u0609
21852185
FormatData/ar_YE/arab.NumberElements/9=\u221e
21862186
FormatData/ar_YE/arab.NumberElements/10=\u0644\u064a\u0633\u00a0\u0631\u0642\u0645
21872187

2188-
# bug #4113654 (this is obviously not an exchaustive test; I'm trying it here for a single
2188+
# bug #4113654 (this is obviously not an exhaustive test; I'm trying it here for a single
21892189
# inheritance chain only. This bug fix also gets tested fairly well by the tests for all
21902190
# the other bugs as given above)
21912191
FormatData//latn.NumberPatterns/0=#,##0.###

test/micro/org/openjdk/bench/java/lang/Characters.java

+46-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -27,11 +27,13 @@
2727
import org.openjdk.jmh.annotations.Benchmark;
2828
import org.openjdk.jmh.annotations.BenchmarkMode;
2929
import org.openjdk.jmh.annotations.Fork;
30+
import org.openjdk.jmh.annotations.Level;
3031
import org.openjdk.jmh.annotations.Measurement;
3132
import org.openjdk.jmh.annotations.Mode;
3233
import org.openjdk.jmh.annotations.OutputTimeUnit;
3334
import org.openjdk.jmh.annotations.Param;
3435
import org.openjdk.jmh.annotations.Scope;
36+
import org.openjdk.jmh.annotations.Setup;
3537
import org.openjdk.jmh.annotations.State;
3638
import org.openjdk.jmh.annotations.Warmup;
3739

@@ -80,4 +82,47 @@ public void codePointOf() {
8082
}
8183

8284
}
85+
86+
@BenchmarkMode(Mode.AverageTime)
87+
@OutputTimeUnit(TimeUnit.NANOSECONDS)
88+
@State(Scope.Thread)
89+
@Warmup(iterations = 5, time = 1)
90+
@Measurement(iterations = 5, time = 1)
91+
@Fork(3)
92+
public static class CaseConversions {
93+
@Param({
94+
"low", // 0x09 pre A
95+
"A", // 0x41 uppercase A
96+
"a", // 0x61 lowercase a
97+
"A-grave", // 0xC0 uppercase A-grave
98+
"a-grave", // 0xE0 lowercase a-grave
99+
"micro", // 0xB5 lowercase 'Micro Sign'
100+
"yD" // 0xFF lowercase 'y with Diaeresis'
101+
})
102+
private String codePoint;
103+
private int cp;
104+
105+
@Setup(Level.Trial)
106+
public void setup() {
107+
cp = switch (codePoint) {
108+
case "low" -> 0x09;
109+
case "A" -> 0x41;
110+
case "a" -> 0x61;
111+
case "A-grave" -> 0xC0;
112+
case "a-grave" -> 0xE0;
113+
case "yD" -> 0xE0;
114+
case "micro" -> 0xFF;
115+
default -> Integer.parseInt(codePoint);;
116+
};
117+
}
118+
@Benchmark
119+
public int toUpperCase() {
120+
return Character.toUpperCase(cp);
121+
}
122+
123+
@Benchmark
124+
public int toLowerCase() {
125+
return Character.toLowerCase(cp);
126+
}
127+
}
83128
}

0 commit comments

Comments
 (0)