Skip to content

Commit

Permalink
8311939: Excessive allocation of Matcher.groups array
Browse files Browse the repository at this point in the history
Reviewed-by: rriggs, igraves
  • Loading branch information
deathy authored and rgiulietti committed Aug 17, 2023
1 parent ed585d1 commit 32efd23
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 4 deletions.
6 changes: 2 additions & 4 deletions src/java.base/share/classes/java/util/regex/Matcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -247,8 +247,7 @@ public final class Matcher implements MatchResult {
this.text = text;

// Allocate state storage
int parentGroupCount = Math.max(parent.capturingGroupCount, 10);
groups = new int[parentGroupCount * 2];
groups = new int[parent.capturingGroupCount * 2];
locals = new int[parent.localCount];
localsPos = new IntHashSet[parent.localTCNCount];

Expand Down Expand Up @@ -422,8 +421,7 @@ public Matcher usePattern(Pattern newPattern) {
namedGroups = null;

// Reallocate state storage
int parentGroupCount = Math.max(newPattern.capturingGroupCount, 10);
groups = new int[parentGroupCount * 2];
groups = new int[newPattern.capturingGroupCount * 2];
locals = new int[newPattern.localCount];
for (int i = 0; i < groups.length; i++)
groups[i] = -1;
Expand Down
12 changes: 12 additions & 0 deletions src/java.base/share/classes/java/util/regex/Pattern.java
Original file line number Diff line number Diff line change
Expand Up @@ -5187,6 +5187,12 @@ static class BackRef extends Node {
groupIndex = groupCount + groupCount;
}
boolean match(Matcher matcher, int i, CharSequence seq) {
// reference to not existing group must never match
// group does not exist if matcher didn't allocate space for it
if (groupIndex >= matcher.groups.length) {
return false;
}

int j = matcher.groups[groupIndex];
int k = matcher.groups[groupIndex+1];

Expand Down Expand Up @@ -5223,6 +5229,12 @@ static class CIBackRef extends Node {
this.doUnicodeCase = doUnicodeCase;
}
boolean match(Matcher matcher, int i, CharSequence seq) {
// reference to not existing group must never match
// group does not exist if matcher didn't allocate space for it
if (groupIndex >= matcher.groups.length) {
return false;
}

int j = matcher.groups[groupIndex];
int k = matcher.groups[groupIndex+1];

Expand Down
52 changes: 52 additions & 0 deletions test/jdk/java/util/regex/RegExTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2041,6 +2041,58 @@ public static void backRefTest() {
check(pattern, toSupplementaries("abcdefghijkk"), true);
}

@Test
public static void ciBackRefTest() {
Pattern pattern = Pattern.compile("(?i)(a*)bc\\1");
check(pattern, "zzzaabcazzz", true);

pattern = Pattern.compile("(?i)(a*)bc\\1");
check(pattern, "zzzaabcaazzz", true);

pattern = Pattern.compile("(?i)(abc)(def)\\1");
check(pattern, "abcdefabc", true);

pattern = Pattern.compile("(?i)(abc)(def)\\3");
check(pattern, "abcdefabc", false);

for (int i = 1; i < 10; i++) {
// Make sure backref 1-9 are always accepted
pattern = Pattern.compile("(?i)abcdef\\" + i);
// and fail to match if the target group does not exit
check(pattern, "abcdef", false);
}

pattern = Pattern.compile("(?i)(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
check(pattern, "abcdefghija", false);
check(pattern, "abcdefghija1", true);

pattern = Pattern.compile("(?i)(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
check(pattern, "abcdefghijkk", true);

pattern = Pattern.compile("(?i)(a)bcdefghij\\11");
check(pattern, "abcdefghija1", true);

// Supplementary character tests
pattern = Pattern.compile("(?i)" + toSupplementaries("(a*)bc\\1"));
check(pattern, toSupplementaries("zzzaabcazzz"), true);

pattern = Pattern.compile("(?i)" + toSupplementaries("(a*)bc\\1"));
check(pattern, toSupplementaries("zzzaabcaazzz"), true);

pattern = Pattern.compile("(?i)" + toSupplementaries("(abc)(def)\\1"));
check(pattern, toSupplementaries("abcdefabc"), true);

pattern = Pattern.compile("(?i)" + toSupplementaries("(abc)(def)\\3"));
check(pattern, toSupplementaries("abcdefabc"), false);

pattern = Pattern.compile("(?i)" + toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
check(pattern, toSupplementaries("abcdefghija"), false);
check(pattern, toSupplementaries("abcdefghija1"), true);

pattern = Pattern.compile("(?i)" + toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
check(pattern, toSupplementaries("abcdefghijkk"), true);
}

/**
* Unicode Technical Report #18, section 2.6 End of Line
* There is no empty line to be matched in the sequence \u000D\u000A
Expand Down

1 comment on commit 32efd23

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.