Skip to content

Commit

Permalink
8308108: Support Unicode extension for collation settings
Browse files Browse the repository at this point in the history
Reviewed-by: iris, rriggs, jlu, alanb
  • Loading branch information
naotoj committed May 25, 2023
1 parent 89b3c37 commit 27ba8bd
Show file tree
Hide file tree
Showing 2 changed files with 143 additions and 1 deletion.
59 changes: 58 additions & 1 deletion src/java.base/share/classes/java/text/Collator.java
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,43 @@ public static synchronized Collator getInstance() {
}

/**
* Gets the Collator for the desired locale.
* Gets the Collator for the desired locale. If the desired locale
* has the "{@code ks}" and/or the "{@code kk}"
* <a href="https://www.unicode.org/reports/tr35/tr35-collation.html#Setting_Options">
* Unicode collation settings</a>, this method will call {@linkplain #setStrength(int)}
* and/or {@linkplain #setDecomposition(int)} on the created instance, if the specified
* Unicode collation settings are recognized based on the following mappings:
* <table class="striped">
* <caption style="display:none">Strength/Decomposition mappings</caption>
* <thead>
* <tr><th scope="col">BCP 47 values for strength (ks)</th>
* <th scope="col">Collator constants for strength</th></tr>
* </thead>
* <tbody>
* <tr><th scope="row" style="text-align:left">level1</th>
* <td>PRIMARY</td></tr>
* <tr><th scope="row" style="text-align:left">level2</th>
* <td>SECONDARY</td></tr>
* <tr><th scope="row" style="text-align:left">level3</th>
* <td>TERTIARY</td></tr>
* <tr><th scope="row" style="text-align:left">identic</th>
* <td>IDENTICAL</td></tr>
* </tbody>
* <thead>
* <tr><th scope="col">BCP 47 values for normalization (kk)</th>
* <th scope="col">Collator constants for decomposition</th></tr>
* </thead>
* <tbody>
* <tr><th scope="row" style="text-align:left">true</th>
* <td>CANONICAL_DECOMPOSITION</td></tr>
* <tr><th scope="row" style="text-align:left">false</th>
* <td>NO_DECOMPOSITION</td></tr>
* </tbody>
* </table>
* If the specified setting value is not recognized, the strength and/or
* decomposition will not be overridden, as if there were no BCP 47 collation
* options in the desired locale.
*
* @apiNote Implementations of {@code Collator} class may produce
* different instances based on the "{@code co}"
* <a href="https://www.unicode.org/reports/tr35/#UnicodeCollationIdentifier">
Expand Down Expand Up @@ -258,6 +294,27 @@ public static Collator getInstance(Locale desiredLocale) {
result = LocaleProviderAdapter.forJRE()
.getCollatorProvider().getInstance(desiredLocale);
}

// Override strength and decomposition with `desiredLocale`, if any
var strength = desiredLocale.getUnicodeLocaleType("ks");
if (strength != null) {
strength = strength.toLowerCase(Locale.ROOT);
switch (strength) {
case "level1" -> result.setStrength(PRIMARY);
case "level2" -> result.setStrength(SECONDARY);
case "level3" -> result.setStrength(TERTIARY);
case "identic" -> result.setStrength(IDENTICAL);
}
}
var norm = desiredLocale.getUnicodeLocaleType("kk");
if (norm != null) {
norm = norm.toLowerCase(Locale.ROOT);
switch (norm) {
case "true" -> result.setDecomposition(CANONICAL_DECOMPOSITION);
case "false" -> result.setDecomposition(NO_DECOMPOSITION);
}
}

while (true) {
if (ref != null) {
// Remove the empty SoftReference if any
Expand Down
85 changes: 85 additions & 0 deletions test/jdk/sun/text/resources/Collator/CollationSettingsTests.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/*
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/

/*
* @test
* @bug 8308108
* @summary Tests for BCP 47 collation settings
* @run junit CollationSettingsTests
*/

import java.text.Collator;
import java.util.Locale;
import java.util.stream.Stream;
import static java.text.Collator.*;

import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import static org.junit.jupiter.api.Assertions.assertEquals;

public class CollationSettingsTests {
private static final Collator ENG_DEF = Collator.getInstance(Locale.ENGLISH);

private static Stream<Arguments> strengthData() {
return Stream.of(
Arguments.of(Locale.forLanguageTag("en-u-ks-level1"), PRIMARY),
Arguments.of(Locale.forLanguageTag("en-u-ks-level2"), SECONDARY),
Arguments.of(Locale.forLanguageTag("en-u-ks-level3"), TERTIARY),
Arguments.of(Locale.forLanguageTag("en-u-ks-identic"), IDENTICAL),
Arguments.of(Locale.forLanguageTag("en-u-ks-LEVEL1"), PRIMARY),
Arguments.of(Locale.forLanguageTag("en-u-ks-LEVEL2"), SECONDARY),
Arguments.of(Locale.forLanguageTag("en-u-ks-LEVEL3"), TERTIARY),
Arguments.of(Locale.forLanguageTag("en-u-ks-IDENTIC"), IDENTICAL),
// unrecognized setting value
Arguments.of(Locale.forLanguageTag("en-u-ks-foo"), ENG_DEF.getStrength()),
Arguments.of(Locale.forLanguageTag("en-u-ks-level4"), ENG_DEF.getStrength()),
Arguments.of(Locale.forLanguageTag("en-u-ks-identical"), ENG_DEF.getStrength())
);
}

private static Stream<Arguments> decompData() {
return Stream.of(
Arguments.of(Locale.forLanguageTag("en-u-kk-true"), CANONICAL_DECOMPOSITION),
Arguments.of(Locale.forLanguageTag("en-u-kk-false"), NO_DECOMPOSITION),
Arguments.of(Locale.forLanguageTag("en-u-kk-TRUE"), CANONICAL_DECOMPOSITION),
Arguments.of(Locale.forLanguageTag("en-u-kk-FALSE"), NO_DECOMPOSITION),
// unrecognized setting value
Arguments.of(Locale.forLanguageTag("en-u-kk-foo"), ENG_DEF.getDecomposition()),
Arguments.of(Locale.forLanguageTag("en-u-kk-truetrue"), ENG_DEF.getDecomposition())
);
}

@ParameterizedTest
@MethodSource("strengthData")
public void testStrength(Locale l, int expected) {
assertEquals(expected, Collator.getInstance(l).getStrength());
}

@ParameterizedTest
@MethodSource("decompData")
public void testDecomposition(Locale l, int expected) {
assertEquals(expected, Collator.getInstance(l).getDecomposition());
}
}

1 comment on commit 27ba8bd

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.