Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
459 changes: 301 additions & 158 deletions src/java.base/share/classes/java/lang/Character.java

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 1996, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1996, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -661,7 +661,7 @@ private int nextContractChar(int ch)

// (the Normalizer is cloned here so that the seeking we do in the next loop
// won't affect our real position in the text)
NormalizerBase tempText = (NormalizerBase)text.clone();
NormalizerBase tempText = text.clone();

// extract the next maxLength characters in the string (we have to do this using the
// Normalizer to ensure that our offsets correspond to those the rest of the
Expand Down Expand Up @@ -732,7 +732,7 @@ private int prevContractChar(int ch)
pair = list.lastElement();
int maxLength = pair.entryName.length();

NormalizerBase tempText = (NormalizerBase)text.clone();
NormalizerBase tempText = text.clone();

tempText.next();
key.setLength(0);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -47,7 +47,7 @@
* @author ram
*/

public class CharacterIteratorWrapper extends UCharacterIterator {
public class CharacterIteratorWrapper extends UCharacterIterator implements Cloneable {

private CharacterIterator iterator;

Expand Down Expand Up @@ -135,7 +135,7 @@ public int getText(char[] fillIn, int offset){
* Creates a clone of this iterator. Clones the underlying character iterator.
* @see UCharacterIterator#clone()
*/
public Object clone(){
public CharacterIteratorWrapper clone(){
try {
CharacterIteratorWrapper result = (CharacterIteratorWrapper) super.clone();
result.iterator = (CharacterIterator)this.iterator.clone();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -269,8 +269,8 @@ private static final class Norm2AllModesSingleton {
private Norm2AllModesSingleton(String name) {
try {
@SuppressWarnings("deprecation")
String DATA_FILE_NAME = "/jdk/internal/icu/impl/data/icudt" +
VersionInfo.ICU_DATA_VERSION_PATH + "/" + name + ".nrm";
String DATA_FILE_NAME = "/jdk/internal/icu/impl/data/icudata/" +
name + ".nrm";
NormalizerImpl impl=new NormalizerImpl().load(DATA_FILE_NAME);
allModes=new Norm2AllModes(impl);
} catch (RuntimeException e) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -51,7 +51,7 @@
*
* What are first, last, and getBeginIndex doing here?!?!?!
*/
public class ReplaceableUCharacterIterator extends UCharacterIterator {
public class ReplaceableUCharacterIterator extends UCharacterIterator implements Cloneable {

// public constructor ------------------------------------------------------

Expand Down Expand Up @@ -86,9 +86,9 @@ public ReplaceableUCharacterIterator(StringBuffer buf){
* <code>Replaceable</code>object
* @return copy of this iterator
*/
public Object clone(){
public ReplaceableUCharacterIterator clone(){
try {
return super.clone();
return (ReplaceableUCharacterIterator) super.clone();
} catch (CloneNotSupportedException e) {
return null; // never invoked
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -201,9 +201,7 @@ public final int getPairedBracket(int c) {
// data format constants ----------------------------------------------- ***
@SuppressWarnings("deprecation")
private static final String DATA_FILE_NAME =
"/jdk/internal/icu/impl/data/icudt" +
VersionInfo.ICU_DATA_VERSION_PATH +
"/ubidi.icu";
"/jdk/internal/icu/impl/data/icudata/ubidi.icu";

/* format "BiDi" */
private static final int FMT=0x42694469;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -330,9 +330,7 @@ public int digit(int c) {
*/
@SuppressWarnings("deprecation")
private static final String DATA_FILE_NAME_ =
"/jdk/internal/icu/impl/data/icudt" +
VersionInfo.ICU_DATA_VERSION_PATH +
"/uprops.icu";
"/jdk/internal/icu/impl/data/icudata/uprops.icu";

/**
* Shift value for lead surrogate to form a supplementary character.
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -420,10 +420,10 @@ public NormalizerBase(CharacterIterator iter, Mode mode) {
* iterator's {@code clone} method does so.
* @stable ICU 2.8
*/
public Object clone() {
public NormalizerBase clone() {
try {
NormalizerBase copy = (NormalizerBase) super.clone();
copy.text = (UCharacterIterator) text.clone();
copy.text = text.clone();
copy.mode = mode;
copy.options = options;
copy.norm2 = norm2;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -310,8 +310,8 @@ public int moveCodePointIndex(int delta){
* @return copy of this iterator
* @stable ICU 2.4
*/
public Object clone() throws CloneNotSupportedException{
return super.clone();
public UCharacterIterator clone() throws CloneNotSupportedException{
return (UCharacterIterator) super.clone();
}

}
16 changes: 14 additions & 2 deletions src/java.base/share/classes/jdk/internal/icu/text/UnicodeSet.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -293,7 +293,7 @@
* @author Alan Liu
* @stable ICU 2.0
*/
public class UnicodeSet {
public class UnicodeSet implements Cloneable {

private static final int LOW = 0x000000; // LOW <= all valid values. ZERO for codepoints
private static final int HIGH = 0x110000; // HIGH > all valid values. 10000 for code units.
Expand Down Expand Up @@ -385,6 +385,18 @@ public UnicodeSet(String pattern) {
applyPattern(pattern, null);
}

/**
* Return a new set that is equivalent to this one.
* @stable ICU 2.0
*/
@Override
public UnicodeSet clone() {
if (isFrozen()) {
return this;
}
return new UnicodeSet(this);
}

/**
* Make this object represent the same set as <code>other</code>.
* @param other a <code>UnicodeSet</code> whose value will be
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -54,7 +54,7 @@ public final class VersionInfo
* @deprecated This API is ICU internal only.
*/
@Deprecated
public static final String ICU_DATA_VERSION_PATH = "76b";
public static final String ICU_DATA_VERSION_PATH = "78b";
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now with fixed path "icudata", this seems to be no longer used.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct. The change was in the upstream which was just merged to our codebase: unicode-org/icu@d93b8bb#diff-2d49053b635edae3d63d88a8b058994e7b156a50d5056ac0fb3bbfe5daa7c4e4L242


// public methods ------------------------------------------------------

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -35,9 +35,9 @@ public final class Grapheme {
* <p>
* See Unicode Standard Annex #29 Unicode Text Segmentation for the specification
* for the extended grapheme cluster boundary rules. The following implementation
* is based on the annex for Unicode version 16.0.
* is based on the annex for Unicode version 17.0.
*
* @spec http://www.unicode.org/reports/tr29/tr29-45.html
* @spec http://www.unicode.org/reports/tr29/tr29-47.html
* @param src the {@code CharSequence} to be scanned
* @param off offset to start looking for the next boundary in the src
* @param limit limit offset in the src (exclusive)
Expand Down Expand Up @@ -283,7 +283,6 @@ static int getType(int cp) {
case 0x113D1:
case 0x1193F:
case 0x11941:
case 0x11A3A:
case 0x11A84:
case 0x11A85:
case 0x11A86:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2023, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -49,8 +49,9 @@ final class IndicConjunctBreak {
}

static boolean isConsonant(int cp) {
// fast check - Devanagari to Malayalam
if (cp < 0x0900 || cp > 0x0D7F) {
// fast check - return false for code points below
// the Devanagari range (lowest among Indic scripts)
if (cp < 0x0900) {
return false;
}

Expand Down
14 changes: 11 additions & 3 deletions src/java.base/share/data/unicodedata/Blocks.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Blocks-16.0.0.txt
# Date: 2024-02-02
# Copyright (c) 2024 Unicode, Inc.
# Blocks-17.0.0.txt
# Date: 2025-08-01
# © 2025 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
#
Expand Down Expand Up @@ -228,6 +228,7 @@ FFF0..FFFF; Specials
108E0..108FF; Hatran
10900..1091F; Phoenician
10920..1093F; Lydian
10940..1095F; Sidetic
10980..1099F; Meroitic Hieroglyphs
109A0..109FF; Meroitic Cursive
10A00..10A5F; Kharoshthi
Expand Down Expand Up @@ -279,11 +280,13 @@ FFF0..FFFF; Specials
11AB0..11ABF; Unified Canadian Aboriginal Syllabics Extended-A
11AC0..11AFF; Pau Cin Hau
11B00..11B5F; Devanagari Extended-A
11B60..11B7F; Sharada Supplement
11BC0..11BFF; Sunuwar
11C00..11C6F; Bhaiksuki
11C70..11CBF; Marchen
11D00..11D5F; Masaram Gondi
11D60..11DAF; Gunjala Gondi
11DB0..11DEF; Tolong Siki
11EE0..11EFF; Makasar
11F00..11F5F; Kawi
11FB0..11FBF; Lisu Supplement
Expand All @@ -304,12 +307,14 @@ FFF0..FFFF; Specials
16B00..16B8F; Pahawh Hmong
16D40..16D7F; Kirat Rai
16E40..16E9F; Medefaidrin
16EA0..16EDF; Beria Erfe
16F00..16F9F; Miao
16FE0..16FFF; Ideographic Symbols and Punctuation
17000..187FF; Tangut
18800..18AFF; Tangut Components
18B00..18CFF; Khitan Small Script
18D00..18D7F; Tangut Supplement
18D80..18DFF; Tangut Components Supplement
1AFF0..1AFFF; Kana Extended-B
1B000..1B0FF; Kana Supplement
1B100..1B12F; Kana Extended-A
Expand All @@ -318,6 +323,7 @@ FFF0..FFFF; Specials
1BC00..1BC9F; Duployan
1BCA0..1BCAF; Shorthand Format Controls
1CC00..1CEBF; Symbols for Legacy Computing Supplement
1CEC0..1CEFF; Miscellaneous Symbols Supplement
1CF00..1CFCF; Znamenny Musical Notation
1D000..1D0FF; Byzantine Musical Symbols
1D100..1D1FF; Musical Symbols
Expand All @@ -336,6 +342,7 @@ FFF0..FFFF; Specials
1E2C0..1E2FF; Wancho
1E4D0..1E4FF; Nag Mundari
1E5D0..1E5FF; Ol Onal
1E6C0..1E6FF; Tai Yo
1E7E0..1E7FF; Ethiopic Extended-B
1E800..1E8DF; Mende Kikakui
1E900..1E95F; Adlam
Expand Down Expand Up @@ -367,6 +374,7 @@ FFF0..FFFF; Specials
2F800..2FA1F; CJK Compatibility Ideographs Supplement
30000..3134F; CJK Unified Ideographs Extension G
31350..323AF; CJK Unified Ideographs Extension H
323B0..3347F; CJK Unified Ideographs Extension J
E0000..E007F; Tags
E0100..E01EF; Variation Selectors Supplement
F0000..FFFFF; Supplementary Private Use Area-A
Expand Down
42 changes: 35 additions & 7 deletions src/java.base/share/data/unicodedata/CaseFolding.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# CaseFolding-16.0.0.txt
# Date: 2024-04-30, 21:48:11 GMT
# © 2024 Unicode®, Inc.
# CaseFolding-17.0.0.txt
# Date: 2025-07-30, 23:54:36 GMT
# © 2025 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
#
Expand All @@ -18,15 +18,15 @@
# The data supports both implementations that require simple case foldings
# (where string lengths don't change), and implementations that allow full case folding
# (where string lengths may grow). Note that where they can be supported, the
# full case foldings are superior: for example, they allow "MASSE" and "Maße" to match.
# full case foldings are superior: for example, they allow "FUSS" and "Fuß" to match.
#
# All code points not listed in this file map to themselves.
#
# NOTE: case folding does not preserve normalization formats!
#
# For information on case folding, including how to have case folding
# preserve normalization formats, see Section 3.13 Default Case Algorithms in
# The Unicode Standard.
# preserve normalization formats, see the
# "Conformance" / "Default Case Algorithms" section of the core specification.
#
# ================================================================================
# Format
Expand Down Expand Up @@ -1243,7 +1243,10 @@ A7C7; C; A7C8; # LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY
A7C9; C; A7CA; # LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY
A7CB; C; 0264; # LATIN CAPITAL LETTER RAMS HORN
A7CC; C; A7CD; # LATIN CAPITAL LETTER S WITH DIAGONAL STROKE
A7CE; C; A7CF; # LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE
A7D0; C; A7D1; # LATIN CAPITAL LETTER CLOSED INSULAR G
A7D2; C; A7D3; # LATIN CAPITAL LETTER DOUBLE THORN
A7D4; C; A7D5; # LATIN CAPITAL LETTER DOUBLE WYNN
A7D6; C; A7D7; # LATIN CAPITAL LETTER MIDDLE SCOTS S
A7D8; C; A7D9; # LATIN CAPITAL LETTER SIGMOID S
A7DA; C; A7DB; # LATIN CAPITAL LETTER LAMBDA
Expand Down Expand Up @@ -1616,6 +1619,31 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
16E5D; C; 16E7D; # MEDEFAIDRIN CAPITAL LETTER O
16E5E; C; 16E7E; # MEDEFAIDRIN CAPITAL LETTER AI
16E5F; C; 16E7F; # MEDEFAIDRIN CAPITAL LETTER Y
16EA0; C; 16EBB; # BERIA ERFE CAPITAL LETTER ARKAB
16EA1; C; 16EBC; # BERIA ERFE CAPITAL LETTER BASIGNA
16EA2; C; 16EBD; # BERIA ERFE CAPITAL LETTER DARBAI
16EA3; C; 16EBE; # BERIA ERFE CAPITAL LETTER EH
16EA4; C; 16EBF; # BERIA ERFE CAPITAL LETTER FITKO
16EA5; C; 16EC0; # BERIA ERFE CAPITAL LETTER GOWAY
16EA6; C; 16EC1; # BERIA ERFE CAPITAL LETTER HIRDEABO
16EA7; C; 16EC2; # BERIA ERFE CAPITAL LETTER I
16EA8; C; 16EC3; # BERIA ERFE CAPITAL LETTER DJAI
16EA9; C; 16EC4; # BERIA ERFE CAPITAL LETTER KOBO
16EAA; C; 16EC5; # BERIA ERFE CAPITAL LETTER LAKKO
16EAB; C; 16EC6; # BERIA ERFE CAPITAL LETTER MERI
16EAC; C; 16EC7; # BERIA ERFE CAPITAL LETTER NINI
16EAD; C; 16EC8; # BERIA ERFE CAPITAL LETTER GNA
16EAE; C; 16EC9; # BERIA ERFE CAPITAL LETTER NGAY
16EAF; C; 16ECA; # BERIA ERFE CAPITAL LETTER OI
16EB0; C; 16ECB; # BERIA ERFE CAPITAL LETTER PI
16EB1; C; 16ECC; # BERIA ERFE CAPITAL LETTER ERIGO
16EB2; C; 16ECD; # BERIA ERFE CAPITAL LETTER ERIGO TAMURA
16EB3; C; 16ECE; # BERIA ERFE CAPITAL LETTER SERI
16EB4; C; 16ECF; # BERIA ERFE CAPITAL LETTER SHEP
16EB5; C; 16ED0; # BERIA ERFE CAPITAL LETTER TATASOUE
16EB6; C; 16ED1; # BERIA ERFE CAPITAL LETTER UI
16EB7; C; 16ED2; # BERIA ERFE CAPITAL LETTER WASSE
16EB8; C; 16ED3; # BERIA ERFE CAPITAL LETTER AY
1E900; C; 1E922; # ADLAM CAPITAL LETTER ALIF
1E901; C; 1E923; # ADLAM CAPITAL LETTER DAALI
1E902; C; 1E924; # ADLAM CAPITAL LETTER LAAM
Expand Down Expand Up @@ -1651,4 +1679,4 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
1E920; C; 1E942; # ADLAM CAPITAL LETTER KPO
1E921; C; 1E943; # ADLAM CAPITAL LETTER SHA
#
# EOF
# EOF
Loading