Skip to content

Commit 27ba8bd

Browse files
committed
8308108: Support Unicode extension for collation settings
Reviewed-by: iris, rriggs, jlu, alanb
1 parent 89b3c37 commit 27ba8bd

File tree

2 files changed

+143
-1
lines changed

2 files changed

+143
-1
lines changed

src/java.base/share/classes/java/text/Collator.java

+58-1
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,43 @@ public static synchronized Collator getInstance() {
226226
}
227227

228228
/**
229-
* Gets the Collator for the desired locale.
229+
* Gets the Collator for the desired locale. If the desired locale
230+
* has the "{@code ks}" and/or the "{@code kk}"
231+
* <a href="https://www.unicode.org/reports/tr35/tr35-collation.html#Setting_Options">
232+
* Unicode collation settings</a>, this method will call {@linkplain #setStrength(int)}
233+
* and/or {@linkplain #setDecomposition(int)} on the created instance, if the specified
234+
* Unicode collation settings are recognized based on the following mappings:
235+
* <table class="striped">
236+
* <caption style="display:none">Strength/Decomposition mappings</caption>
237+
* <thead>
238+
* <tr><th scope="col">BCP 47 values for strength (ks)</th>
239+
* <th scope="col">Collator constants for strength</th></tr>
240+
* </thead>
241+
* <tbody>
242+
* <tr><th scope="row" style="text-align:left">level1</th>
243+
* <td>PRIMARY</td></tr>
244+
* <tr><th scope="row" style="text-align:left">level2</th>
245+
* <td>SECONDARY</td></tr>
246+
* <tr><th scope="row" style="text-align:left">level3</th>
247+
* <td>TERTIARY</td></tr>
248+
* <tr><th scope="row" style="text-align:left">identic</th>
249+
* <td>IDENTICAL</td></tr>
250+
* </tbody>
251+
* <thead>
252+
* <tr><th scope="col">BCP 47 values for normalization (kk)</th>
253+
* <th scope="col">Collator constants for decomposition</th></tr>
254+
* </thead>
255+
* <tbody>
256+
* <tr><th scope="row" style="text-align:left">true</th>
257+
* <td>CANONICAL_DECOMPOSITION</td></tr>
258+
* <tr><th scope="row" style="text-align:left">false</th>
259+
* <td>NO_DECOMPOSITION</td></tr>
260+
* </tbody>
261+
* </table>
262+
* If the specified setting value is not recognized, the strength and/or
263+
* decomposition will not be overridden, as if there were no BCP 47 collation
264+
* options in the desired locale.
265+
*
230266
* @apiNote Implementations of {@code Collator} class may produce
231267
* different instances based on the "{@code co}"
232268
* <a href="https://www.unicode.org/reports/tr35/#UnicodeCollationIdentifier">
@@ -258,6 +294,27 @@ public static Collator getInstance(Locale desiredLocale) {
258294
result = LocaleProviderAdapter.forJRE()
259295
.getCollatorProvider().getInstance(desiredLocale);
260296
}
297+
298+
// Override strength and decomposition with `desiredLocale`, if any
299+
var strength = desiredLocale.getUnicodeLocaleType("ks");
300+
if (strength != null) {
301+
strength = strength.toLowerCase(Locale.ROOT);
302+
switch (strength) {
303+
case "level1" -> result.setStrength(PRIMARY);
304+
case "level2" -> result.setStrength(SECONDARY);
305+
case "level3" -> result.setStrength(TERTIARY);
306+
case "identic" -> result.setStrength(IDENTICAL);
307+
}
308+
}
309+
var norm = desiredLocale.getUnicodeLocaleType("kk");
310+
if (norm != null) {
311+
norm = norm.toLowerCase(Locale.ROOT);
312+
switch (norm) {
313+
case "true" -> result.setDecomposition(CANONICAL_DECOMPOSITION);
314+
case "false" -> result.setDecomposition(NO_DECOMPOSITION);
315+
}
316+
}
317+
261318
while (true) {
262319
if (ref != null) {
263320
// Remove the empty SoftReference if any
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
/*
2+
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*/
23+
24+
/*
25+
* @test
26+
* @bug 8308108
27+
* @summary Tests for BCP 47 collation settings
28+
* @run junit CollationSettingsTests
29+
*/
30+
31+
import java.text.Collator;
32+
import java.util.Locale;
33+
import java.util.stream.Stream;
34+
import static java.text.Collator.*;
35+
36+
import org.junit.jupiter.api.Test;
37+
import org.junit.jupiter.params.ParameterizedTest;
38+
import org.junit.jupiter.params.provider.Arguments;
39+
import org.junit.jupiter.params.provider.MethodSource;
40+
import static org.junit.jupiter.api.Assertions.assertEquals;
41+
42+
public class CollationSettingsTests {
43+
private static final Collator ENG_DEF = Collator.getInstance(Locale.ENGLISH);
44+
45+
private static Stream<Arguments> strengthData() {
46+
return Stream.of(
47+
Arguments.of(Locale.forLanguageTag("en-u-ks-level1"), PRIMARY),
48+
Arguments.of(Locale.forLanguageTag("en-u-ks-level2"), SECONDARY),
49+
Arguments.of(Locale.forLanguageTag("en-u-ks-level3"), TERTIARY),
50+
Arguments.of(Locale.forLanguageTag("en-u-ks-identic"), IDENTICAL),
51+
Arguments.of(Locale.forLanguageTag("en-u-ks-LEVEL1"), PRIMARY),
52+
Arguments.of(Locale.forLanguageTag("en-u-ks-LEVEL2"), SECONDARY),
53+
Arguments.of(Locale.forLanguageTag("en-u-ks-LEVEL3"), TERTIARY),
54+
Arguments.of(Locale.forLanguageTag("en-u-ks-IDENTIC"), IDENTICAL),
55+
// unrecognized setting value
56+
Arguments.of(Locale.forLanguageTag("en-u-ks-foo"), ENG_DEF.getStrength()),
57+
Arguments.of(Locale.forLanguageTag("en-u-ks-level4"), ENG_DEF.getStrength()),
58+
Arguments.of(Locale.forLanguageTag("en-u-ks-identical"), ENG_DEF.getStrength())
59+
);
60+
}
61+
62+
private static Stream<Arguments> decompData() {
63+
return Stream.of(
64+
Arguments.of(Locale.forLanguageTag("en-u-kk-true"), CANONICAL_DECOMPOSITION),
65+
Arguments.of(Locale.forLanguageTag("en-u-kk-false"), NO_DECOMPOSITION),
66+
Arguments.of(Locale.forLanguageTag("en-u-kk-TRUE"), CANONICAL_DECOMPOSITION),
67+
Arguments.of(Locale.forLanguageTag("en-u-kk-FALSE"), NO_DECOMPOSITION),
68+
// unrecognized setting value
69+
Arguments.of(Locale.forLanguageTag("en-u-kk-foo"), ENG_DEF.getDecomposition()),
70+
Arguments.of(Locale.forLanguageTag("en-u-kk-truetrue"), ENG_DEF.getDecomposition())
71+
);
72+
}
73+
74+
@ParameterizedTest
75+
@MethodSource("strengthData")
76+
public void testStrength(Locale l, int expected) {
77+
assertEquals(expected, Collator.getInstance(l).getStrength());
78+
}
79+
80+
@ParameterizedTest
81+
@MethodSource("decompData")
82+
public void testDecomposition(Locale l, int expected) {
83+
assertEquals(expected, Collator.getInstance(l).getDecomposition());
84+
}
85+
}

0 commit comments

Comments
 (0)