Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions jdk/make/data/characterdata/CharacterData02.java.template
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,21 @@ class CharacterData02 extends CharacterData {
}

boolean isJavaIdentifierStart(int ch) {
// isJavaIdentifierStart strictly conforms to code points assigned
// in Unicode 6.2.
if(Character.UnicodeBlock.of(ch) ==
Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E)
return false;
int props = getProperties(ch);
return ((props & $$maskIdentifierInfo) >= $$lowJavaStart);
}

boolean isJavaIdentifierPart(int ch) {
// isJavaIdentifierPart strictly conforms to code points assigned
// in Unicode 6.2.
if(Character.UnicodeBlock.of(ch) ==
Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E)
return false;
int props = getProperties(ch);
return ((props & $$nonzeroJavaPart) != 0);
}
Expand Down
2 changes: 2 additions & 0 deletions jdk/make/data/unicodedata/UnicodeData.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23550,6 +23550,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
2B734;<CJK Ideograph Extension C, Last>;Lo;0;L;;;;;N;;;;;
2B740;<CJK Ideograph Extension D, First>;Lo;0;L;;;;;N;;;;;
2B81D;<CJK Ideograph Extension D, Last>;Lo;0;L;;;;;N;;;;;
2B820;<CJK Ideograph Extension E, First>;Lo;0;L;;;;;N;;;;;
2CEA1;<CJK Ideograph Extension E, Last>;Lo;0;L;;;;;N;;;;;
2F800;CJK COMPATIBILITY IDEOGRAPH-2F800;Lo;0;L;4E3D;;;;N;;;;;
2F801;CJK COMPATIBILITY IDEOGRAPH-2F801;Lo;0;L;4E38;;;;N;;;;;
2F802;CJK COMPATIBILITY IDEOGRAPH-2F802;Lo;0;L;4E41;;;;N;;;;;
Expand Down
23 changes: 19 additions & 4 deletions jdk/src/share/classes/java/lang/Character.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,10 @@
* block from version 10.0 of the Unicode Standard. Second, the Java SE 8 Platform
* allows an implementation of class {@code Character} to use the code points
* in the range of {@code U+9FCD} to {@code U+9FEF} from version 11.0 of the
* Unicode Standard, in order for the class to allow the "Implementation
* Level 1" of the Chinese GB18030-2022 standard. Third, the Java SE 8 Platform
* Unicode Standard and in the {@code CJK Unified Ideographs Extension E} block
* from version 8.0 of the Unicode Standard, in order for the class to allow the
* "Implementation Level 2" of the Chinese GB18030-2022 standard.
* Third, the Java SE 8 Platform
* allows an implementation of class {@code Character} to use the Japanese Era
* code point, {@code U+32FF}, from the Unicode Standard version 12.1.
* Consequently, the
Expand Down Expand Up @@ -2575,7 +2577,18 @@ private UnicodeBlock(String idName, String... aliases) {
"ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
"ARABICMATHEMATICALALPHABETICSYMBOLS");

private static final int[] blockStarts = {
/**
* Constant for the "CJK Unified Ideographs Extension E" Unicode
* character block.
* @apiNote This field is defined in Java SE 8 Maintenance Release 5.
* @since 1.8
*/
public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E =
new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E",
"CJK UNIFIED IDEOGRAPHS EXTENSION E",
"CJKUNIFIEDIDEOGRAPHSEXTENSIONE");

private static final int blockStarts[] = {
0x0000, // 0000..007F; Basic Latin
0x0080, // 0080..00FF; Latin-1 Supplement
0x0100, // 0100..017F; Latin Extended-A
Expand Down Expand Up @@ -2823,7 +2836,8 @@ private UnicodeBlock(String idName, String... aliases) {
0x2A6E0, // unassigned
0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C
0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D
0x2B820, // unassigned
0x2B820, // 2B820..2CEAF; CJK Unified Ideographs Extension E
0x2CEB0, // unassigned
0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
0x2FA20, // unassigned
0xE0000, // E0000..E007F; Tags
Expand Down Expand Up @@ -3082,6 +3096,7 @@ private UnicodeBlock(String idName, String... aliases) {
null,
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E,
null,
CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
null,
Expand Down
4 changes: 2 additions & 2 deletions jdk/test/java/lang/Character/CheckScript.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

/*
* Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2010, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand All @@ -24,7 +24,7 @@

/**
* @test
* @bug 6945564 6959267 7033561 7070436 7198195
* @bug 6945564 6959267 7033561 7070436 7198195 8305681
* @summary Check that the j.l.Character.UnicodeScript
*/

Expand Down
3 changes: 2 additions & 1 deletion jdk/test/java/lang/Character/Scripts.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1439,9 +1439,10 @@ FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILI
20000..2A6D6 ; Han # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
2A700..2B734 ; Han # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
2B740..2B81D ; Han # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; Han # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D

# Total code points: 75998
# Total code points: 81760

# ================================================

Expand Down
31 changes: 20 additions & 11 deletions jdk/test/java/lang/Character/TestIsJavaIdentifierMethods.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,26 @@
* @test
* @summary Test behavior of isJavaIdentifierXX, testIsJavaLetter, and
* testIsJavaLetterOrDigit methods for all code points.
* @bug 8218915
* @bug 8218915 8301400 8305681
*/

public class TestIsJavaIdentifierMethods {
// Unassigned code points not present in Unicode 6.2 (which Java SE 8
// is based upon), including: various currency symbol sign code points
// (Nordic Mark ... Bitcoin), Japanese Era Square character code point,
// and 35 CJK Unified Ideograph code points from GB18030-2022
// (Nordic Mark ... Bitcoin), the Japanese Era Square character code point, and
// code points for GB1030-2022 level 1 and 2 implementation including
// (35 code points from CJK Unified Ideographs and all of CJK Unified Ideographs
// Extension E).
private static final int CS_SIGNS_CODEPOINT_START = 0x20BB;
private static final int CS_SIGNS_CODEPOINT_END = 0x20BF;
private static final int JAPANESE_ERA_CODEPOINT = 0x32FF;
private static final int GB18030_2022_CODEPOINT_START = 0x9FCD;
private static final int GB18030_2022_CODEPOINT_END = 0x9FEF;
// GB18030_2022 Code Points
private static final int CJK_GB18030_LEVEL1_START = 0x9FCD;
private static final int CJK_GB18030_LEVEL1_END = 0x9FEF;
// Extension E code points are greater than U+FFFF,
// and thus only the int methods need to be tested
private static final int CJK_EXTENSION_E_START = 0x2B820;
private static final int CJK_EXTENSION_E_END = 0x2CEAF;

public static void main(String[] args) {
testIsJavaIdentifierPart_int();
Expand Down Expand Up @@ -77,7 +84,8 @@ public static void testIsJavaIdentifierPart_int() {
// value of variable "expected" is considered false.
if (cp != JAPANESE_ERA_CODEPOINT &&
!(cp >= CS_SIGNS_CODEPOINT_START && cp <= CS_SIGNS_CODEPOINT_END) &&
!(cp >= GB18030_2022_CODEPOINT_START && cp <= GB18030_2022_CODEPOINT_END)) {
!(cp >= CJK_GB18030_LEVEL1_START && cp <= CJK_GB18030_LEVEL1_END) &&
!(cp >= CJK_EXTENSION_E_START && cp <= CJK_EXTENSION_E_END)) {
byte type = (byte) Character.getType(cp);
expected = Character.isLetter(cp)
|| type == Character.CURRENCY_SYMBOL
Expand Down Expand Up @@ -125,7 +133,7 @@ public static void testIsJavaIdentifierPart_char() {
// value of variable "expected" is considered false.
if (i != JAPANESE_ERA_CODEPOINT &&
!(i >= CS_SIGNS_CODEPOINT_START && i <= CS_SIGNS_CODEPOINT_END) &&
!(i >= GB18030_2022_CODEPOINT_START && i <= GB18030_2022_CODEPOINT_END)) {
!(i >= CJK_GB18030_LEVEL1_START && i <= CJK_GB18030_LEVEL1_END)) {
byte type = (byte) Character.getType(ch);
expected = Character.isLetter(ch)
|| type == Character.CURRENCY_SYMBOL
Expand Down Expand Up @@ -168,7 +176,8 @@ public static void testIsJavaIdentifierStart_int() {
// value of variable "expected" is considered false.
if (cp != JAPANESE_ERA_CODEPOINT &&
!(cp >= CS_SIGNS_CODEPOINT_START && cp <= CS_SIGNS_CODEPOINT_END) &&
!(cp >= GB18030_2022_CODEPOINT_START && cp <= GB18030_2022_CODEPOINT_END)) {
!(cp >= CJK_GB18030_LEVEL1_START && cp <= CJK_GB18030_LEVEL1_END) &&
!(cp >= CJK_EXTENSION_E_START && cp <= CJK_EXTENSION_E_END)) {
byte type = (byte) Character.getType(cp);
expected = Character.isLetter(cp)
|| type == Character.LETTER_NUMBER
Expand Down Expand Up @@ -208,7 +217,7 @@ public static void testIsJavaIdentifierStart_char() {
// value of variable "expected" is considered false.
if (i != JAPANESE_ERA_CODEPOINT &&
!(i >= CS_SIGNS_CODEPOINT_START && i <= CS_SIGNS_CODEPOINT_END) &&
!(i >= GB18030_2022_CODEPOINT_START && i <= GB18030_2022_CODEPOINT_END)) {
!(i >= CJK_GB18030_LEVEL1_START && i <= CJK_GB18030_LEVEL1_END)) {
byte type = (byte) Character.getType(ch);
expected = Character.isLetter(ch)
|| type == Character.LETTER_NUMBER
Expand Down Expand Up @@ -248,7 +257,7 @@ public static void testIsJavaLetter() {
// value of variable "expected" is considered false.
if (i != JAPANESE_ERA_CODEPOINT &&
!(i >= CS_SIGNS_CODEPOINT_START && i <= CS_SIGNS_CODEPOINT_END) &&
!(i >= GB18030_2022_CODEPOINT_START && i <= GB18030_2022_CODEPOINT_END)) {
!(i >= CJK_GB18030_LEVEL1_START && i <= CJK_GB18030_LEVEL1_END)) {
byte type = (byte) Character.getType(ch);
expected = Character.isLetter(ch)
|| type == Character.LETTER_NUMBER
Expand Down Expand Up @@ -292,7 +301,7 @@ public static void testIsJavaLetterOrDigit() {
// value of variable "expected" is considered false.
if (i != JAPANESE_ERA_CODEPOINT &&
!(i >= CS_SIGNS_CODEPOINT_START && i <= CS_SIGNS_CODEPOINT_END) &&
!(i >= GB18030_2022_CODEPOINT_START && i <= GB18030_2022_CODEPOINT_END)) {
!(i >= CJK_GB18030_LEVEL1_START && i <= CJK_GB18030_LEVEL1_END)) {
byte type = (byte) Character.getType(ch);
expected = Character.isLetter(ch)
|| type == Character.CURRENCY_SYMBOL
Expand Down