|
1 | 1 | /* |
2 | | - * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. |
| 2 | + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. |
3 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | 4 | * |
5 | 5 | * This code is free software; you can redistribute it and/or modify it |
|
28 | 28 | * @bug 8218915 |
29 | 29 | */ |
30 | 30 |
|
31 | | -import java.util.List; |
32 | | -import java.util.ArrayList; |
33 | | - |
34 | 31 | public class TestIsJavaIdentifierMethods { |
35 | | - |
36 | | - // List of new code points are not present in Unicode 6.2. |
37 | | - private static final List<Integer> UNASSIGNED_CODEPOINTS_IN_6_2 |
38 | | - = new ArrayList<Integer>() |
39 | | - {{ |
40 | | - add(0x20BB); // NORDIC MARK SIGN |
41 | | - add(0x20BC); // MANAT SIGN |
42 | | - add(0x20BD); // RUBLE SIGN |
43 | | - add(0x20BE); // LARI SIGN |
44 | | - add(0x20BF); // BITCOIN SIGN |
45 | | - add(0x32FF); // SQUARE ERA NAME NEWERA |
46 | | - }}; |
| 32 | + // Unassigned code points not present in Unicode 6.2 (which Java SE 8 |
| 33 | + // is based upon), including: various currency symbol sign code points |
| 34 | + // (Nordic Mark ... Bitcoin), Japanese Era Square character code point, |
| 35 | + // and 35 CJK Unified Ideograph code points from GB18030-2022 |
| 36 | + private static final int CS_SIGNS_CODEPOINT_START = 0x20BB; |
| 37 | + private static final int CS_SIGNS_CODEPOINT_END = 0x20BF; |
| 38 | + private static final int JAPANESE_ERA_CODEPOINT = 0x32FF; |
| 39 | + private static final int GB18030_2022_CODEPOINT_START = 0x9FCD; |
| 40 | + private static final int GB18030_2022_CODEPOINT_END = 0x9FEF; |
47 | 41 |
|
48 | 42 | public static void main(String[] args) { |
49 | 43 | testIsJavaIdentifierPart_int(); |
@@ -75,14 +69,15 @@ public static void main(String[] args) { |
75 | 69 | public static void testIsJavaIdentifierPart_int() { |
76 | 70 | for (int cp = 0; cp <= Character.MAX_CODE_POINT; cp++) { |
77 | 71 | boolean expected = false; |
78 | | - |
79 | 72 | // Since Character.isJavaIdentifierPart(int) strictly conforms to |
80 | 73 | // character information from version 6.2 of the Unicode Standard, |
81 | | - // check if code point is in "UNASSIGNED_CODEPOINTS_IN_6_2" |
82 | | - // list. If the code point is found in list |
83 | | - // "UNASSIGNED_CODEPOINTS_IN_6_2", value of variable |
84 | | - // "expected" is considered false. |
85 | | - if (!UNASSIGNED_CODEPOINTS_IN_6_2.contains(cp)) { |
| 74 | + // check if code point is one of the extra unassigned |
| 75 | + // code points (defined at the beginning of the file). If the code |
| 76 | + // point is found to be one of the unassigned code points, |
| 77 | + // value of variable "expected" is considered false. |
| 78 | + if (cp != JAPANESE_ERA_CODEPOINT && |
| 79 | + !(cp >= CS_SIGNS_CODEPOINT_START && cp <= CS_SIGNS_CODEPOINT_END) && |
| 80 | + !(cp >= GB18030_2022_CODEPOINT_START && cp <= GB18030_2022_CODEPOINT_END)) { |
86 | 81 | byte type = (byte) Character.getType(cp); |
87 | 82 | expected = Character.isLetter(cp) |
88 | 83 | || type == Character.CURRENCY_SYMBOL |
@@ -124,11 +119,13 @@ public static void testIsJavaIdentifierPart_char() { |
124 | 119 | boolean expected = false; |
125 | 120 | // Since Character.isJavaIdentifierPart(char) strictly conforms to |
126 | 121 | // character information from version 6.2 of the Unicode Standard, |
127 | | - // check if code point is in "UNASSIGNED_CODEPOINTS_IN_6_2" |
128 | | - // list. If the code point is found in list |
129 | | - // "UNASSIGNED_CODEPOINTS_IN_6_2", value of variable |
130 | | - // "expected" is considered false. |
131 | | - if (!UNASSIGNED_CODEPOINTS_IN_6_2.contains(i)) { |
| 122 | + // check if code point is one of the extra unassigned |
| 123 | + // code points (defined at the beginning of the file). If the code |
| 124 | + // point is found to be one of the unassigned code points, |
| 125 | + // value of variable "expected" is considered false. |
| 126 | + if (i != JAPANESE_ERA_CODEPOINT && |
| 127 | + !(i >= CS_SIGNS_CODEPOINT_START && i <= CS_SIGNS_CODEPOINT_END) && |
| 128 | + !(i >= GB18030_2022_CODEPOINT_START && i <= GB18030_2022_CODEPOINT_END)) { |
132 | 129 | byte type = (byte) Character.getType(ch); |
133 | 130 | expected = Character.isLetter(ch) |
134 | 131 | || type == Character.CURRENCY_SYMBOL |
@@ -165,11 +162,13 @@ public static void testIsJavaIdentifierStart_int() { |
165 | 162 | boolean expected = false; |
166 | 163 | // Since Character.isJavaIdentifierStart(int) strictly conforms to |
167 | 164 | // character information from version 6.2 of the Unicode Standard, |
168 | | - // check if code point is in "UNASSIGNED_CODEPOINTS_IN_6_2" |
169 | | - // list. If the code point is found in list |
170 | | - // "UNASSIGNED_CODEPOINTS_IN_6_2", value of variable |
171 | | - // "expected" is considered false. |
172 | | - if (!UNASSIGNED_CODEPOINTS_IN_6_2.contains(cp)) { |
| 165 | + // check if code point is one of the extra unassigned |
| 166 | + // code points (defined at the beginning of the file). If the code |
| 167 | + // point is found to be one of the unassigned code points, |
| 168 | + // value of variable "expected" is considered false. |
| 169 | + if (cp != JAPANESE_ERA_CODEPOINT && |
| 170 | + !(cp >= CS_SIGNS_CODEPOINT_START && cp <= CS_SIGNS_CODEPOINT_END) && |
| 171 | + !(cp >= GB18030_2022_CODEPOINT_START && cp <= GB18030_2022_CODEPOINT_END)) { |
173 | 172 | byte type = (byte) Character.getType(cp); |
174 | 173 | expected = Character.isLetter(cp) |
175 | 174 | || type == Character.LETTER_NUMBER |
@@ -203,11 +202,13 @@ public static void testIsJavaIdentifierStart_char() { |
203 | 202 | boolean expected = false; |
204 | 203 | // Since Character.isJavaIdentifierStart(char) strictly conforms to |
205 | 204 | // character information from version 6.2 of the Unicode Standard, |
206 | | - // check if code point is in "UNASSIGNED_CODEPOINTS_IN_6_2" |
207 | | - // list. If the code point is found in list |
208 | | - // "UNASSIGNED_CODEPOINTS_IN_6_2", value of variable |
209 | | - // "expected" is considered false. |
210 | | - if (!UNASSIGNED_CODEPOINTS_IN_6_2.contains(i)) { |
| 205 | + // check if code point is one of the extra unassigned |
| 206 | + // code points (defined at the beginning of the file). If the code |
| 207 | + // point is found to be one of the unassigned code points, |
| 208 | + // value of variable "expected" is considered false. |
| 209 | + if (i != JAPANESE_ERA_CODEPOINT && |
| 210 | + !(i >= CS_SIGNS_CODEPOINT_START && i <= CS_SIGNS_CODEPOINT_END) && |
| 211 | + !(i >= GB18030_2022_CODEPOINT_START && i <= GB18030_2022_CODEPOINT_END)) { |
211 | 212 | byte type = (byte) Character.getType(ch); |
212 | 213 | expected = Character.isLetter(ch) |
213 | 214 | || type == Character.LETTER_NUMBER |
@@ -241,11 +242,13 @@ public static void testIsJavaLetter() { |
241 | 242 | boolean expected = false; |
242 | 243 | // Since Character.isJavaLetter(char) strictly conforms to |
243 | 244 | // character information from version 6.2 of the Unicode Standard, |
244 | | - // check if code point is in "UNASSIGNED_CODEPOINTS_IN_6_2" |
245 | | - // list. If the code point is found in list |
246 | | - // "UNASSIGNED_CODEPOINTS_IN_6_2", value of variable |
247 | | - // "expected" is considered false. |
248 | | - if (!UNASSIGNED_CODEPOINTS_IN_6_2.contains(i)) { |
| 245 | + // check if code point is one of the extra unassigned |
| 246 | + // code points (defined at the beginning of the file). If the code |
| 247 | + // point is found to be one of the unassigned code points, |
| 248 | + // value of variable "expected" is considered false. |
| 249 | + if (i != JAPANESE_ERA_CODEPOINT && |
| 250 | + !(i >= CS_SIGNS_CODEPOINT_START && i <= CS_SIGNS_CODEPOINT_END) && |
| 251 | + !(i >= GB18030_2022_CODEPOINT_START && i <= GB18030_2022_CODEPOINT_END)) { |
249 | 252 | byte type = (byte) Character.getType(ch); |
250 | 253 | expected = Character.isLetter(ch) |
251 | 254 | || type == Character.LETTER_NUMBER |
@@ -283,11 +286,13 @@ public static void testIsJavaLetterOrDigit() { |
283 | 286 | boolean expected = false; |
284 | 287 | // Since Character.isJavaLetterOrDigit(char) strictly conforms to |
285 | 288 | // character information from version 6.2 of the Unicode Standard, |
286 | | - // check if code point is in "UNASSIGNED_CODEPOINTS_IN_6_2" |
287 | | - // list. If the code point is found in list |
288 | | - // "UNASSIGNED_CODEPOINTS_IN_6_2", value of variable |
289 | | - // "expected" is considered false. |
290 | | - if (!UNASSIGNED_CODEPOINTS_IN_6_2.contains(i)) { |
| 289 | + // check if code point is one of the extra unassigned |
| 290 | + // code points (defined at the beginning of the file). If the code |
| 291 | + // point is found to be one of the unassigned code points, |
| 292 | + // value of variable "expected" is considered false. |
| 293 | + if (i != JAPANESE_ERA_CODEPOINT && |
| 294 | + !(i >= CS_SIGNS_CODEPOINT_START && i <= CS_SIGNS_CODEPOINT_END) && |
| 295 | + !(i >= GB18030_2022_CODEPOINT_START && i <= GB18030_2022_CODEPOINT_END)) { |
291 | 296 | byte type = (byte) Character.getType(ch); |
292 | 297 | expected = Character.isLetter(ch) |
293 | 298 | || type == Character.CURRENCY_SYMBOL |
|
0 commit comments