Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions jdk/make/data/characterdata/CharacterData00.java.template
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -106,19 +106,23 @@ class CharacterData00 extends CharacterData {

boolean isJavaIdentifierStart(int ch) {
// isJavaIdentifierStart strictly conforms to code points assigned
// in Unicode 6.2. Since code points {32FF} and {20BB..20BF} are not
// from Unicode 6.2, return false.
if(ch == 0x32FF || (ch>= 0x20BB && ch<= 0x20BF))
// in Unicode 6.2. Since code points {32FF}, {20BB..20BF}, and
// {9FCD..9FEF} are not from Unicode 6.2, return false.
if(ch == 0x32FF ||
(ch>= 0x20BB && ch<= 0x20BF) ||
(ch>= 0x9FCD && ch<= 0x9FEF))
return false;
int props = getProperties(ch);
return ((props & $$maskIdentifierInfo) >= $$lowJavaStart);
}

boolean isJavaIdentifierPart(int ch) {
// isJavaIdentifierPart strictly conforms to code points assigned
// in Unicode 6.2. Since code points {32FF} and {20BB..20BF} are not
// from Unicode 6.2, return false.
if(ch == 0x32FF || (ch>= 0x20BB && ch<= 0x20BF))
// in Unicode 6.2. Since code points {32FF}, {20BB..20BF}, and
// {9FCD..9FEF} are not from Unicode 6.2, return false.
if(ch == 0x32FF ||
(ch>= 0x20BB && ch<= 0x20BF) ||
(ch>= 0x9FCD && ch<= 0x9FEF))
return false;
int props = getProperties(ch);
return ((props & $$nonzeroJavaPart) != 0);
Expand Down
2 changes: 1 addition & 1 deletion jdk/make/data/unicodedata/UnicodeData.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11732,7 +11732,7 @@
4DFE;HEXAGRAM FOR AFTER COMPLETION;So;0;ON;;;;;N;;;;;
4DFF;HEXAGRAM FOR BEFORE COMPLETION;So;0;ON;;;;;N;;;;;
4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;;
9FCC;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;;
9FEF;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;;
A000;YI SYLLABLE IT;Lo;0;L;;;;;N;;;;;
A001;YI SYLLABLE IX;Lo;0;L;;;;;N;;;;;
A002;YI SYLLABLE I;Lo;0;L;;;;;N;;;;;
Expand Down
16 changes: 10 additions & 6 deletions jdk/src/share/classes/java/lang/Character.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2002, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -52,13 +52,17 @@
* <a href="http://www.unicode.org">http://www.unicode.org</a>.
* <p>
* The Java SE 8 Platform uses character information from version 6.2
* of the Unicode Standard, with two extensions. First, the Java SE 8 Platform
* allows an implementation of class {@code Character} to use the Japanese Era
* code point, {@code U+32FF}, from the first version of the Unicode Standard
* after 6.2 that assigns the code point. Second, in recognition of the fact
* of the Unicode Standard, with three extensions. First, in recognition of the fact
* that new currencies appear frequently, the Java SE 8 Platform allows an
* implementation of class {@code Character} to use the Currency Symbols
* block from version 10.0 of the Unicode Standard. Consequently, the
* block from version 10.0 of the Unicode Standard. Second, the Java SE 8 Platform
* allows an implementation of class {@code Character} to use the code points
* in the range of {@code U+9FCD} to {@code U+9FEF} from version 11.0 of the
* Unicode Standard, in order for the class to allow the "Implementation
* Level 1" of the Chinese GB18030-2022 standard. Third, the Java SE 8 Platform
* allows an implementation of class {@code Character} to use the Japanese Era
* code point, {@code U+32FF}, from the Unicode Standard version 12.1.
* Consequently, the
* behavior of fields and methods of class {@code Character} may vary across
* implementations of the Java SE 8 Platform when processing the aforementioned
* code points ( outside of version 6.2 ), except for the following methods
Expand Down
4 changes: 2 additions & 2 deletions jdk/test/java/lang/Character/Scripts.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1433,15 +1433,15 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
3038..303A ; Han # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY
303B ; Han # Lm VERTICAL IDEOGRAPHIC ITERATION MARK
3400..4DB5 ; Han # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
4E00..9FCC ; Han # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC
4E00..9FEF ; Han # Lo [20976] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FEF
F900..FA6D ; Han # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D
FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
20000..2A6D6 ; Han # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
2A700..2B734 ; Han # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
2B740..2B81D ; Han # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D

# Total code points: 75963
# Total code points: 75998

# ================================================

Expand Down
99 changes: 52 additions & 47 deletions jdk/test/java/lang/Character/TestIsJavaIdentifierMethods.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -28,22 +28,16 @@
* @bug 8218915
*/

import java.util.List;
import java.util.ArrayList;

public class TestIsJavaIdentifierMethods {

// List of new code points are not present in Unicode 6.2.
private static final List<Integer> UNASSIGNED_CODEPOINTS_IN_6_2
= new ArrayList<Integer>()
{{
add(0x20BB); // NORDIC MARK SIGN
add(0x20BC); // MANAT SIGN
add(0x20BD); // RUBLE SIGN
add(0x20BE); // LARI SIGN
add(0x20BF); // BITCOIN SIGN
add(0x32FF); // SQUARE ERA NAME NEWERA
}};
// Unassigned code points not present in Unicode 6.2 (which Java SE 8
// is based upon), including: various currency symbol sign code points
// (Nordic Mark ... Bitcoin), Japanese Era Square character code point,
// and 35 CJK Unified Ideograph code points from GB18030-2022
private static final int CS_SIGNS_CODEPOINT_START = 0x20BB;
private static final int CS_SIGNS_CODEPOINT_END = 0x20BF;
private static final int JAPANESE_ERA_CODEPOINT = 0x32FF;
private static final int GB18030_2022_CODEPOINT_START = 0x9FCD;
private static final int GB18030_2022_CODEPOINT_END = 0x9FEF;

public static void main(String[] args) {
testIsJavaIdentifierPart_int();
Expand Down Expand Up @@ -75,14 +69,15 @@ public static void main(String[] args) {
public static void testIsJavaIdentifierPart_int() {
for (int cp = 0; cp <= Character.MAX_CODE_POINT; cp++) {
boolean expected = false;

// Since Character.isJavaIdentifierPart(int) strictly conforms to
// character information from version 6.2 of the Unicode Standard,
// check if code point is in "UNASSIGNED_CODEPOINTS_IN_6_2"
// list. If the code point is found in list
// "UNASSIGNED_CODEPOINTS_IN_6_2", value of variable
// "expected" is considered false.
if (!UNASSIGNED_CODEPOINTS_IN_6_2.contains(cp)) {
// check if code point is one of the extra unassigned
// code points (defined at the beginning of the file). If the code
// point is found to be one of the unassigned code points,
// value of variable "expected" is considered false.
if (cp != JAPANESE_ERA_CODEPOINT &&
!(cp >= CS_SIGNS_CODEPOINT_START && cp <= CS_SIGNS_CODEPOINT_END) &&
!(cp >= GB18030_2022_CODEPOINT_START && cp <= GB18030_2022_CODEPOINT_END)) {
byte type = (byte) Character.getType(cp);
expected = Character.isLetter(cp)
|| type == Character.CURRENCY_SYMBOL
Expand Down Expand Up @@ -124,11 +119,13 @@ public static void testIsJavaIdentifierPart_char() {
boolean expected = false;
// Since Character.isJavaIdentifierPart(char) strictly conforms to
// character information from version 6.2 of the Unicode Standard,
// check if code point is in "UNASSIGNED_CODEPOINTS_IN_6_2"
// list. If the code point is found in list
// "UNASSIGNED_CODEPOINTS_IN_6_2", value of variable
// "expected" is considered false.
if (!UNASSIGNED_CODEPOINTS_IN_6_2.contains(i)) {
// check if code point is one of the extra unassigned
// code points (defined at the beginning of the file). If the code
// point is found to be one of the unassigned code points,
// value of variable "expected" is considered false.
if (i != JAPANESE_ERA_CODEPOINT &&
!(i >= CS_SIGNS_CODEPOINT_START && i <= CS_SIGNS_CODEPOINT_END) &&
!(i >= GB18030_2022_CODEPOINT_START && i <= GB18030_2022_CODEPOINT_END)) {
byte type = (byte) Character.getType(ch);
expected = Character.isLetter(ch)
|| type == Character.CURRENCY_SYMBOL
Expand Down Expand Up @@ -165,11 +162,13 @@ public static void testIsJavaIdentifierStart_int() {
boolean expected = false;
// Since Character.isJavaIdentifierStart(int) strictly conforms to
// character information from version 6.2 of the Unicode Standard,
// check if code point is in "UNASSIGNED_CODEPOINTS_IN_6_2"
// list. If the code point is found in list
// "UNASSIGNED_CODEPOINTS_IN_6_2", value of variable
// "expected" is considered false.
if (!UNASSIGNED_CODEPOINTS_IN_6_2.contains(cp)) {
// check if code point is one of the extra unassigned
// code points (defined at the beginning of the file). If the code
// point is found to be one of the unassigned code points,
// value of variable "expected" is considered false.
if (cp != JAPANESE_ERA_CODEPOINT &&
!(cp >= CS_SIGNS_CODEPOINT_START && cp <= CS_SIGNS_CODEPOINT_END) &&
!(cp >= GB18030_2022_CODEPOINT_START && cp <= GB18030_2022_CODEPOINT_END)) {
byte type = (byte) Character.getType(cp);
expected = Character.isLetter(cp)
|| type == Character.LETTER_NUMBER
Expand Down Expand Up @@ -203,11 +202,13 @@ public static void testIsJavaIdentifierStart_char() {
boolean expected = false;
// Since Character.isJavaIdentifierStart(char) strictly conforms to
// character information from version 6.2 of the Unicode Standard,
// check if code point is in "UNASSIGNED_CODEPOINTS_IN_6_2"
// list. If the code point is found in list
// "UNASSIGNED_CODEPOINTS_IN_6_2", value of variable
// "expected" is considered false.
if (!UNASSIGNED_CODEPOINTS_IN_6_2.contains(i)) {
// check if code point is one of the extra unassigned
// code points (defined at the beginning of the file). If the code
// point is found to be one of the unassigned code points,
// value of variable "expected" is considered false.
if (i != JAPANESE_ERA_CODEPOINT &&
!(i >= CS_SIGNS_CODEPOINT_START && i <= CS_SIGNS_CODEPOINT_END) &&
!(i >= GB18030_2022_CODEPOINT_START && i <= GB18030_2022_CODEPOINT_END)) {
byte type = (byte) Character.getType(ch);
expected = Character.isLetter(ch)
|| type == Character.LETTER_NUMBER
Expand Down Expand Up @@ -241,11 +242,13 @@ public static void testIsJavaLetter() {
boolean expected = false;
// Since Character.isJavaLetter(char) strictly conforms to
// character information from version 6.2 of the Unicode Standard,
// check if code point is in "UNASSIGNED_CODEPOINTS_IN_6_2"
// list. If the code point is found in list
// "UNASSIGNED_CODEPOINTS_IN_6_2", value of variable
// "expected" is considered false.
if (!UNASSIGNED_CODEPOINTS_IN_6_2.contains(i)) {
// check if code point is one of the extra unassigned
// code points (defined at the beginning of the file). If the code
// point is found to be one of the unassigned code points,
// value of variable "expected" is considered false.
if (i != JAPANESE_ERA_CODEPOINT &&
!(i >= CS_SIGNS_CODEPOINT_START && i <= CS_SIGNS_CODEPOINT_END) &&
!(i >= GB18030_2022_CODEPOINT_START && i <= GB18030_2022_CODEPOINT_END)) {
byte type = (byte) Character.getType(ch);
expected = Character.isLetter(ch)
|| type == Character.LETTER_NUMBER
Expand Down Expand Up @@ -283,11 +286,13 @@ public static void testIsJavaLetterOrDigit() {
boolean expected = false;
// Since Character.isJavaLetterOrDigit(char) strictly conforms to
// character information from version 6.2 of the Unicode Standard,
// check if code point is in "UNASSIGNED_CODEPOINTS_IN_6_2"
// list. If the code point is found in list
// "UNASSIGNED_CODEPOINTS_IN_6_2", value of variable
// "expected" is considered false.
if (!UNASSIGNED_CODEPOINTS_IN_6_2.contains(i)) {
// check if code point is one of the extra unassigned
// code points (defined at the beginning of the file). If the code
// point is found to be one of the unassigned code points,
// value of variable "expected" is considered false.
if (i != JAPANESE_ERA_CODEPOINT &&
!(i >= CS_SIGNS_CODEPOINT_START && i <= CS_SIGNS_CODEPOINT_END) &&
!(i >= GB18030_2022_CODEPOINT_START && i <= GB18030_2022_CODEPOINT_END)) {
byte type = (byte) Character.getType(ch);
expected = Character.isLetter(ch)
|| type == Character.CURRENCY_SYMBOL
Expand Down