3636 * 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706
3737 * 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034 8235812
3838 * 8216332 8214245 8237599 8241055 8247546 8258259 8037397 8269753 8276694
39- *
39+ * 8280403 8264160 8281315
4040 * @library /test/lib
4141 * @library /lib/testlibrary/java/lang
4242 * @build jdk.test.lib.RandomFactory
5151import java .nio .file .Files ;
5252import java .nio .file .Path ;
5353import java .nio .file .Paths ;
54- import java .util .ArrayList ;
55- import java .util .Arrays ;
56- import java .util .HashMap ;
57- import java .util .List ;
58- import java .util .Map ;
59- import java .util .Random ;
60- import java .util .Scanner ;
54+ import java .util .*;
6155import java .util .function .Function ;
56+ import java .util .function .IntFunction ;
6257import java .util .function .Predicate ;
6358import java .util .regex .Matcher ;
6459import java .util .regex .MatchResult ;
@@ -3854,11 +3849,11 @@ public static void unicodeClassesTest() {
38543849 }
38553850
38563851 // bounds/word align
3857- twoFindIndexes (" \u0180 sherman\u0400 " , bound , 1 , 10 );
3852+ twoFindIndexes (" \u0180 sherman\u0400 " , boundU , 1 , 10 );
38583853 assertTrue (bwbU .reset ("\u0180 sherman\u0400 " ).matches ());
3859- twoFindIndexes (" \u0180 sh\u0345 erman\u0400 " , bound , 1 , 11 );
3854+ twoFindIndexes (" \u0180 sh\u0345 erman\u0400 " , boundU , 1 , 11 );
38603855 assertTrue (bwbU .reset ("\u0180 sh\u0345 erman\u0400 " ).matches ());
3861- twoFindIndexes (" \u0724 \u0739 \u0724 " , bound , 1 , 4 );
3856+ twoFindIndexes (" \u0724 \u0739 \u0724 " , boundU , 1 , 4 );
38623857 assertTrue (bwbU .reset ("\u0724 \u0739 \u0724 " ).matches ());
38633858 assertTrue (bwbEU .reset ("\u0724 \u0739 \u0724 " ).matches ());
38643859 }
@@ -4503,6 +4498,8 @@ public static void surrogatePairOverlapRegion() {
45034498 }
45044499
45054500 //This test is for 8037397
4501+ //Ensure we don't drop nested interior character classes to the right of an
4502+ //intersection operator.
45064503 @ Test
45074504 public static void droppedClassesWithIntersection () {
45084505 String rx = "[A-Z&&[A-Z]0-9]" ;
@@ -4530,6 +4527,9 @@ public static void droppedClassesWithIntersection() {
45304527 }
45314528
45324529 //This test is for 8269753
4530+ //This is for ensuring that the caret doesn't point at the wrong character
4531+ //in a syntax exception message because we previously didn't compensate for
4532+ //tabs when rendering the offending string that contained tab characters.
45334533 @ Test
45344534 public static void errorMessageCaretIndentation () {
45354535 String pattern = "\t **" ;
@@ -4540,6 +4540,8 @@ public static void errorMessageCaretIndentation() {
45404540 }
45414541
45424542 //This test is for 8276694
4543+ //Ensure our error message indicates we have an unescaped backslash when we
4544+ //encounter one.
45434545 @ Test
45444546 public static void unescapedBackslash () {
45454547 String pattern = "\\ " ;
@@ -4549,6 +4551,7 @@ public static void unescapedBackslash() {
45494551 }
45504552
45514553 //This test is for 8280403
4554+ //Given bad intersection syntax, we should throw a PatternSyntaxException.
45524555 @ Test
45534556 public static void badIntersectionSyntax () {
45544557 String pattern = "[˜\\ H +F&&]" ;
@@ -4557,7 +4560,70 @@ public static void badIntersectionSyntax() {
45574560 assertTrue (e .getMessage ().contains ("Bad intersection syntax" ));
45584561 }
45594562
4563+ //This test is for 8264160
4564+ //Here we check for inconsistencies between the behavior of \w and the
4565+ //behavior of \b. Prior to this fix, the two flags did not behave in a
4566+ //consistent way ie \b would recognize non-\w characters as part of a word
4567+ //in some cases. This test verifies that the two behave consistently
4568+ //for all codepoints we support.
4569+ @ Test
4570+ public static void wordBoundaryInconsistencies () {
4571+ Pattern basicWordCharPattern = Pattern .compile ("\\ w" );
4572+ Pattern basicWordCharBoundaryPattern =
4573+ Pattern .compile (";\\ b." , Pattern .DOTALL );
4574+
4575+ Pattern unicodeWordCharPattern =
4576+ Pattern .compile ("\\ w" , Pattern .UNICODE_CHARACTER_CLASS );
4577+
4578+ Pattern unicodeWordCharBoundaryPattern =
4579+ Pattern .compile (";\\ b." ,
4580+ Pattern .DOTALL | Pattern .UNICODE_CHARACTER_CLASS );
4581+
4582+ IntFunction <Boolean > basicWordCharCheck =
4583+ (cp ) -> cpMatches (basicWordCharPattern , cp , false );
4584+
4585+ IntFunction <Boolean > basicBoundaryCharCheck =
4586+ (cp ) -> cpMatches (basicWordCharBoundaryPattern ,
4587+ cp , true );
4588+
4589+ IntFunction <Boolean > unicodeWordCharCheck =
4590+ (cp ) -> cpMatches (unicodeWordCharPattern , cp , false );
4591+
4592+ IntFunction <Boolean > unicodeBoundaryCharCheck =
4593+ (cp ) -> cpMatches (unicodeWordCharBoundaryPattern ,
4594+ cp ,true );
4595+
4596+ //basic pattern comparison
4597+ for (int cp = 0 ; cp <= Character .MAX_CODE_POINT ; cp ++){
4598+ assertEquals (basicWordCharCheck .apply (cp ),
4599+ basicBoundaryCharCheck .apply (cp ),
4600+ "Codepoint: " + cp );
4601+ assertEquals (unicodeWordCharCheck .apply (cp ),
4602+ unicodeBoundaryCharCheck .apply (cp ),
4603+ "Codepoint: " + cp );
4604+ }
4605+ }
4606+
4607+ private static boolean cpMatches (Pattern p , int cp , boolean boundary ) {
4608+ String cpString ;
4609+ if (Character .isBmpCodePoint (cp )) {
4610+ cpString = "" + ((char ) cp );
4611+ } else {
4612+ cpString = "" + Character .highSurrogate (cp ) +
4613+ Character .lowSurrogate (cp );
4614+ }
4615+
4616+ if (boundary ) {
4617+ return p .matcher (";" + cpString ).matches ();
4618+ } else {
4619+ return p .matcher (cpString ).matches ();
4620+ }
4621+ }
4622+
45604623 //This test is for 8281560
4624+ //Checks that when the Canonical Equivalence flag is set, the behavior for
4625+ //Matcher::hitEnd is equivalent for these similar, patterns that saw
4626+ //inconsistencies.
45614627 @ Test
45624628 public static void prematureHitEndInNFCCharProperty () {
45634629 var testInput = "a1a1" ;
@@ -4582,6 +4648,8 @@ public static void prematureHitEndInNFCCharProperty() {
45824648 }
45834649
45844650 //This test is for 8281315
4651+ //Checks that we are able to correctly match this case with a backref
4652+ //without encountering an IndexOutOfBoundsException.
45854653 @ Test
45864654 public static void iOOBForCIBackrefs (){
45874655 String line = "\ud83d \udc95 \ud83d \udc95 \ud83d \udc95 " ;
0 commit comments