diff --git a/src/java.base/share/classes/java/util/regex/Pattern.java b/src/java.base/share/classes/java/util/regex/Pattern.java index ffffbb9ed80..850c52871df 100644 --- a/src/java.base/share/classes/java/util/regex/Pattern.java +++ b/src/java.base/share/classes/java/util/regex/Pattern.java @@ -1048,9 +1048,10 @@ public final class Pattern private transient int patternLength; /** - * If the Start node might possibly match supplementary characters. + * If the Start node might possibly match supplementary or surrogate + * code points. * It is set to true during compiling if - * (1) There is supplementary char in pattern, or + * (1) There is supplementary or surrogate code point in pattern, or * (2) There is complement node of a "family" CharProperty */ private transient boolean hasSupplementary; @@ -2930,8 +2931,10 @@ private CharProperty newCharProperty(CharPredicate p) { return null; if (p instanceof BmpCharPredicate) return new BmpCharProperty((BmpCharPredicate)p); - else + else { + hasSupplementary = true; return new CharProperty(p); + } } /** @@ -5793,18 +5796,18 @@ private static boolean inRange(int lower, int ch, int upper) { } /** - * Charactrs within a explicit value range + * Characters within a explicit value range */ static CharPredicate Range(int lower, int upper) { if (upper < Character.MIN_HIGH_SURROGATE || - lower > Character.MAX_HIGH_SURROGATE && + lower > Character.MAX_LOW_SURROGATE && upper < Character.MIN_SUPPLEMENTARY_CODE_POINT) return (BmpCharPredicate)(ch -> inRange(lower, ch, upper)); return ch -> inRange(lower, ch, upper); } /** - * Charactrs within a explicit value range in a case insensitive manner. + * Characters within a explicit value range in a case insensitive manner. */ static CharPredicate CIRange(int lower, int upper) { return ch -> inRange(lower, ch, upper) || diff --git a/test/jdk/java/util/regex/RegExTest.java b/test/jdk/java/util/regex/RegExTest.java index 23f71c63287..cb7e56f4195 100644 --- a/test/jdk/java/util/regex/RegExTest.java +++ b/test/jdk/java/util/regex/RegExTest.java @@ -35,7 +35,7 @@ * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819 * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895 * 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706 - * 8194667 8197462 8184692 + * 8194667 8197462 8184692 8247546 * * @library /test/lib * @build jdk.test.lib.RandomFactory diff --git a/test/jdk/java/util/regex/SupplementaryTestCases.txt b/test/jdk/java/util/regex/SupplementaryTestCases.txt index 644a91b6be7..8cd1b91b3fc 100644 --- a/test/jdk/java/util/regex/SupplementaryTestCases.txt +++ b/test/jdk/java/util/regex/SupplementaryTestCases.txt @@ -1,5 +1,5 @@ // -// Copyright (c) 1999, 2009, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // // This code is free software; you can redistribute it and/or modify it @@ -129,6 +129,31 @@ true \ud800\udc00pqr 0 ///\ud800\udc00 ///false 0 +// unpaired surrogate should match +[\x{d800}-\x{dbff}\x{dc00}-\x{dfff}] +xxx\udca9\ud83dyyy +true \udca9 0 + +// surrogates in a supplementary character should not match +[\x{d800}-\x{dbff}\x{dc00}-\x{dfff}] +\ud83d\udca9 +false 0 + +// unpaired surrogate should match +[\p{InHIGH_SURROGATES}\p{InLOW_SURROGATES}] +xxx\udca9\ud83dyyy +true \udca9 0 + +// surrogates part of a supplementary character should not match +[\p{InHIGH_SURROGATES}\p{InLOW_SURROGATES}] +\ud83d\udca9 +false 0 + +// low surrogate part of a supplementary character should not match +[\x{dc00}-\x{dfff}] +\ud83d\udca9 +false 0 + // use of x modifier \ud800\udc61bc(?x)bl\ud800\udc61h \ud800\udc61bcbl\ud800\udc61h