Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 66 additions & 36 deletions src/java.base/share/classes/java/util/regex/CharPredicates.java
Original file line number Diff line number Diff line change
Expand Up @@ -169,11 +169,15 @@ static final CharPredicate WORD() {

/////////////////////////////////////////////////////////////////////////////

private static CharPredicate getPosixPredicate(String name) {
private static CharPredicate getPosixPredicate(String name, boolean caseIns) {
switch (name) {
case "ALPHA": return ALPHABETIC();
case "LOWER": return LOWERCASE();
case "UPPER": return UPPERCASE();
case "LOWER": return caseIns
? LOWERCASE().union(UPPERCASE(), TITLECASE())
: LOWERCASE();
case "UPPER": return caseIns
? UPPERCASE().union(LOWERCASE(), TITLECASE())
: UPPERCASE();
case "SPACE": return WHITE_SPACE();
case "PUNCT": return PUNCTUATION();
case "XDIGIT": return HEX_DIGIT();
Expand All @@ -187,7 +191,7 @@ private static CharPredicate getPosixPredicate(String name) {
}
}

private static CharPredicate getUnicodePredicate(String name) {
private static CharPredicate getUnicodePredicate(String name, boolean caseIns) {
switch (name) {
case "ALPHABETIC": return ALPHABETIC();
case "ASSIGNED": return ASSIGNED();
Expand All @@ -196,11 +200,17 @@ private static CharPredicate getUnicodePredicate(String name) {
case "IDEOGRAPHIC": return IDEOGRAPHIC();
case "JOINCONTROL": return JOIN_CONTROL();
case "LETTER": return LETTER();
case "LOWERCASE": return LOWERCASE();
case "LOWERCASE": return caseIns
? LOWERCASE().union(UPPERCASE(), TITLECASE())
: LOWERCASE();
case "NONCHARACTERCODEPOINT": return NONCHARACTER_CODE_POINT();
case "TITLECASE": return TITLECASE();
case "TITLECASE": return caseIns
? TITLECASE().union(LOWERCASE(), UPPERCASE())
: TITLECASE();
case "PUNCTUATION": return PUNCTUATION();
case "UPPERCASE": return UPPERCASE();
case "UPPERCASE": return caseIns
? UPPERCASE().union(LOWERCASE(), TITLECASE())
: UPPERCASE();
case "WHITESPACE": return WHITE_SPACE();
case "WORD": return WORD();
case "WHITE_SPACE": return WHITE_SPACE();
Expand All @@ -211,16 +221,16 @@ private static CharPredicate getUnicodePredicate(String name) {
}
}

public static CharPredicate forUnicodeProperty(String propName) {
public static CharPredicate forUnicodeProperty(String propName, boolean caseIns) {
propName = propName.toUpperCase(Locale.ROOT);
CharPredicate p = getUnicodePredicate(propName);
CharPredicate p = getUnicodePredicate(propName, caseIns);
if (p != null)
return p;
return getPosixPredicate(propName);
return getPosixPredicate(propName, caseIns);
}

public static CharPredicate forPOSIXName(String propName) {
return getPosixPredicate(propName.toUpperCase(Locale.ENGLISH));
public static CharPredicate forPOSIXName(String propName, boolean caseIns) {
return getPosixPredicate(propName.toUpperCase(Locale.ENGLISH), caseIns);
}

/////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -254,14 +264,23 @@ static CharPredicate forUnicodeBlock(String name) {

// unicode categories, aliases, properties, java methods ...

static CharPredicate forProperty(String name) {
static CharPredicate forProperty(String name, boolean caseIns) {
// Unicode character property aliases, defined in
// http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt
switch (name) {
case "Cn": return category(1<<Character.UNASSIGNED);
case "Lu": return category(1<<Character.UPPERCASE_LETTER);
case "Ll": return category(1<<Character.LOWERCASE_LETTER);
case "Lt": return category(1<<Character.TITLECASE_LETTER);
case "Lu": return category(caseIns ? (1 << Character.LOWERCASE_LETTER) |
(1 << Character.UPPERCASE_LETTER) |
(1 << Character.TITLECASE_LETTER)
: (1 << Character.UPPERCASE_LETTER));
case "Ll": return category(caseIns ? (1 << Character.LOWERCASE_LETTER) |
(1 << Character.UPPERCASE_LETTER) |
(1 << Character.TITLECASE_LETTER)
: (1 << Character.LOWERCASE_LETTER));
case "Lt": return category(caseIns ? (1 << Character.LOWERCASE_LETTER) |
(1 << Character.UPPERCASE_LETTER) |
(1 << Character.TITLECASE_LETTER)
: (1 << Character.TITLECASE_LETTER));
case "Lm": return category(1<<Character.MODIFIER_LETTER);
case "Lo": return category(1<<Character.OTHER_LETTER);
case "Mn": return category(1<<Character.NON_SPACING_MARK);
Expand Down Expand Up @@ -338,32 +357,43 @@ static CharPredicate forProperty(String name) {
case "Cntrl": return ctype(ASCII.CNTRL); // Control characters
case "Digit": return range('0', '9'); // Numeric characters
case "Graph": return ctype(ASCII.GRAPH); // printable and visible
case "Lower": return range('a', 'z'); // Lower-case alphabetic
case "Lower": return caseIns ? ctype(ASCII.ALPHA)
: range('a', 'z'); // Lower-case alphabetic
case "Print": return range(0x20, 0x7E); // Printable characters
case "Punct": return ctype(ASCII.PUNCT); // Punctuation characters
case "Space": return ctype(ASCII.SPACE); // Space characters
case "Upper": return range('A', 'Z'); // Upper-case alphabetic
case "Upper": return caseIns ? ctype(ASCII.ALPHA)
: range('A', 'Z'); // Upper-case alphabetic
case "XDigit": return ctype(ASCII.XDIGIT); // hexadecimal digits

// Java character properties, defined by methods in Character.java
case "javaLowerCase": return java.lang.Character::isLowerCase;
case "javaUpperCase": return Character::isUpperCase;
case "javaAlphabetic": return java.lang.Character::isAlphabetic;
case "javaIdeographic": return java.lang.Character::isIdeographic;
case "javaTitleCase": return java.lang.Character::isTitleCase;
case "javaDigit": return java.lang.Character::isDigit;
case "javaDefined": return java.lang.Character::isDefined;
case "javaLetter": return java.lang.Character::isLetter;
case "javaLetterOrDigit": return java.lang.Character::isLetterOrDigit;
case "javaJavaIdentifierStart": return java.lang.Character::isJavaIdentifierStart;
case "javaJavaIdentifierPart": return java.lang.Character::isJavaIdentifierPart;
case "javaUnicodeIdentifierStart": return java.lang.Character::isUnicodeIdentifierStart;
case "javaUnicodeIdentifierPart": return java.lang.Character::isUnicodeIdentifierPart;
case "javaIdentifierIgnorable": return java.lang.Character::isIdentifierIgnorable;
case "javaSpaceChar": return java.lang.Character::isSpaceChar;
case "javaWhitespace": return java.lang.Character::isWhitespace;
case "javaISOControl": return java.lang.Character::isISOControl;
case "javaMirrored": return java.lang.Character::isMirrored;
case "javaLowerCase": return caseIns ? c -> Character.isLowerCase(c) ||
Character.isUpperCase(c) ||
Character.isTitleCase(c)
: Character::isLowerCase;
case "javaUpperCase": return caseIns ? c -> Character.isUpperCase(c) ||
Character.isLowerCase(c) ||
Character.isTitleCase(c)
: Character::isUpperCase;
case "javaAlphabetic": return Character::isAlphabetic;
case "javaIdeographic": return Character::isIdeographic;
case "javaTitleCase": return caseIns ? c -> Character.isTitleCase(c) ||
Character.isLowerCase(c) ||
Character.isUpperCase(c)
: Character::isTitleCase;
case "javaDigit": return Character::isDigit;
case "javaDefined": return Character::isDefined;
case "javaLetter": return Character::isLetter;
case "javaLetterOrDigit": return Character::isLetterOrDigit;
case "javaJavaIdentifierStart": return Character::isJavaIdentifierStart;
case "javaJavaIdentifierPart": return Character::isJavaIdentifierPart;
case "javaUnicodeIdentifierStart": return Character::isUnicodeIdentifierStart;
case "javaUnicodeIdentifierPart": return Character::isUnicodeIdentifierPart;
case "javaIdentifierIgnorable": return Character::isIdentifierIgnorable;
case "javaSpaceChar": return Character::isSpaceChar;
case "javaWhitespace": return Character::isWhitespace;
case "javaISOControl": return Character::isISOControl;
case "javaMirrored": return Character::isMirrored;
default: return null;
}
}
Expand Down
10 changes: 5 additions & 5 deletions src/java.base/share/classes/java/util/regex/Pattern.java
Original file line number Diff line number Diff line change
Expand Up @@ -2885,7 +2885,7 @@ private CharPredicate family(boolean singleLetter, boolean isComplement) {
break;
case "gc":
case "general_category":
p = CharPredicates.forProperty(value);
p = CharPredicates.forProperty(value, has(CASE_INSENSITIVE));
break;
default:
break;
Expand All @@ -2901,17 +2901,17 @@ private CharPredicate family(boolean singleLetter, boolean isComplement) {
} else if (name.startsWith("Is")) {
// \p{IsGeneralCategory} and \p{IsScriptName}
name = name.substring(2);
p = CharPredicates.forUnicodeProperty(name);
p = CharPredicates.forUnicodeProperty(name, has(CASE_INSENSITIVE));
if (p == null)
p = CharPredicates.forProperty(name);
p = CharPredicates.forProperty(name, has(CASE_INSENSITIVE));
if (p == null)
p = CharPredicates.forUnicodeScript(name);
} else {
if (has(UNICODE_CHARACTER_CLASS)) {
p = CharPredicates.forPOSIXName(name);
p = CharPredicates.forPOSIXName(name, has(CASE_INSENSITIVE));
}
if (p == null)
p = CharPredicates.forProperty(name);
p = CharPredicates.forProperty(name, has(CASE_INSENSITIVE));
}
if (p == null)
throw error("Unknown character property name {In/Is" + name + "}");
Expand Down
62 changes: 61 additions & 1 deletion test/jdk/java/util/regex/RegExTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
* 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819
* 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895
* 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706
* 8194667 8197462 8184692 8247546
* 8194667 8197462 8184692 8247546 8305733
*
* @library /test/lib
* @build jdk.test.lib.RandomFactory
Expand Down Expand Up @@ -170,6 +170,7 @@ public static void main(String[] args) throws Exception {
grapheme();
expoBacktracking();
invalidGroupName();
caseInsensitivePMatch();

if (failure) {
throw new
Expand Down Expand Up @@ -4932,4 +4933,63 @@ private static void invalidGroupName() {
}
report("Invalid capturing group names");
}

// This test is for 8305733
private static void caseInsensitivePMatch() {
for (String input : List.of("abcd", "AbCd", "ABCD")) {
for (String pattern : List.of("abcd", "aBcD", "[a-d]{4}",
"(?:a|b|c|d){4}", "\\p{Lower}{4}", "\\p{Ll}{4}",
"\\p{IsLl}{4}", "\\p{gc=Ll}{4}",
"\\p{general_category=Ll}{4}", "\\p{IsLowercase}{4}",
"\\p{javaLowerCase}{4}", "\\p{Upper}{4}", "\\p{Lu}{4}",
"\\p{IsLu}{4}", "\\p{gc=Lu}{4}", "\\p{general_category=Lu}{4}",
"\\p{IsUppercase}{4}", "\\p{javaUpperCase}{4}",
"\\p{Lt}{4}", "\\p{IsLt}{4}", "\\p{gc=Lt}{4}",
"\\p{general_category=Lt}{4}", "\\p{IsTitlecase}{4}",
"\\p{javaTitleCase}{4}", "[\\p{Lower}]{4}", "[\\p{Ll}]{4}",
"[\\p{IsLl}]{4}", "[\\p{gc=Ll}]{4}",
"[\\p{general_category=Ll}]{4}", "[\\p{IsLowercase}]{4}",
"[\\p{javaLowerCase}]{4}", "[\\p{Upper}]{4}", "[\\p{Lu}]{4}",
"[\\p{IsLu}]{4}", "[\\p{gc=Lu}]{4}",
"[\\p{general_category=Lu}]{4}", "[\\p{IsUppercase}]{4}",
"[\\p{javaUpperCase}]{4}", "[\\p{Lt}]{4}", "[\\p{IsLt}]{4}",
"[\\p{gc=Lt}]{4}", "[\\p{general_category=Lt}]{4}",
"[\\p{IsTitlecase}]{4}", "[\\p{javaTitleCase}]{4}"))
{
Predicate<String> p = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE).asPredicate();
if (!p.test(input)) {
failCount++;
}
}
}

for (String input : List.of("\u01c7", "\u01c8", "\u01c9")) {
for (String pattern : List.of("\u01c7", "\u01c8", "\u01c9",
"[\u01c7\u01c8]", "[\u01c7\u01c9]", "[\u01c8\u01c9]",
"[\u01c7-\u01c8]", "[\u01c8-\u01c9]", "[\u01c7-\u01c9]",
"\\p{Lower}", "\\p{Ll}", "\\p{IsLl}", "\\p{gc=Ll}",
"\\p{general_category=Ll}", "\\p{IsLowercase}",
"\\p{javaLowerCase}", "\\p{Upper}", "\\p{Lu}",
"\\p{IsLu}", "\\p{gc=Lu}", "\\p{general_category=Lu}",
"\\p{IsUppercase}", "\\p{javaUpperCase}",
"\\p{Lt}", "\\p{IsLt}", "\\p{gc=Lt}",
"\\p{general_category=Lt}", "\\p{IsTitlecase}",
"\\p{javaTitleCase}", "[\\p{Lower}]", "[\\p{Ll}]",
"[\\p{IsLl}]", "[\\p{gc=Ll}]",
"[\\p{general_category=Ll}]", "[\\p{IsLowercase}]",
"[\\p{javaLowerCase}]", "[\\p{Upper}]", "[\\p{Lu}]",
"[\\p{IsLu}]", "[\\p{gc=Lu}]",
"[\\p{general_category=Lu}]", "[\\p{IsUppercase}]",
"[\\p{javaUpperCase}]", "[\\p{Lt}]", "[\\p{IsLt}]",
"[\\p{gc=Lt}]", "[\\p{general_category=Lt}]",
"[\\p{IsTitlecase}]", "[\\p{javaTitleCase}]"))
{
Predicate<String> p = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE
| Pattern.UNICODE_CHARACTER_CLASS).asPredicate();
if (!p.test(input)) {
failCount++;
}
}
}
}
}