Skip to content
This repository has been archived by the owner. It is now read-only.
Permalink
Browse files
8258259: Unicode linebreak matching behavior is incorrect; backout JD…
…K-8235812

Reviewed-by: naoto
  • Loading branch information
Stuart Marks committed Dec 18, 2020
1 parent 7320e05 commit cbc3feeb8902da9d32d93963da6a0309725340df
Showing with 29 additions and 77 deletions.
  1. +11 −67 src/java.base/share/classes/java/util/regex/Pattern.java
  2. +18 −10 test/jdk/java/util/regex/RegExTest.java
@@ -2063,7 +2063,7 @@ private Node expr(Node end) {
Node prev = null;
Node firstTail = null;
Branch branch = null;
BranchConn branchConn = null;
Node branchConn = null;

for (;;) {
Node node = sequence(end);
@@ -2211,24 +2211,7 @@ private Node sequence(Node end) {
break;
}

if (node instanceof LineEnding) {
LineEnding le = (LineEnding)node;
node = closureOfLineEnding(le);

if (node != le) {
// LineEnding was replaced with an anonymous group
if (head == null)
head = node;
else
tail.next = node;
// Double return: Tail was returned in root
tail = root;
continue;
}
} else {
node = closure(node);
}

node = closure(node);
/* save the top dot-greedy nodes (.*, .+) as well
if (node instanceof GreedyCharProperty &&
((GreedyCharProperty)node).cp instanceof Dot) {
@@ -3096,31 +3079,18 @@ private Node group0() {
if (saveTCNCount < topClosureNodes.size())
topClosureNodes.subList(saveTCNCount, topClosureNodes.size()).clear();

return groupWithClosure(node, head, tail, capturingGroup);
}

/**
* Transforms a Group with quantifiers into some special constructs
* (such as Branch or Loop/GroupCurly), if necessary.
*
* This method is applied either to actual groups or to the Unicode
* linebreak (aka \\R) represented as an anonymous group.
*/
private Node groupWithClosure(Node node, Node head, Node tail,
boolean capturingGroup)
{
if (node instanceof Ques) {
Ques ques = (Ques) node;
if (ques.type == Qtype.POSSESSIVE) {
root = node;
return node;
}
BranchConn branchConn = new BranchConn();
tail = tail.next = branchConn;
tail.next = new BranchConn();
tail = tail.next;
if (ques.type == Qtype.GREEDY) {
head = new Branch(head, null, branchConn);
head = new Branch(head, null, tail);
} else { // Reluctant quantifier
head = new Branch(null, head, branchConn);
head = new Branch(null, head, tail);
}
root = tail;
return head;
@@ -3297,31 +3267,6 @@ private Node curly(Node prev, int cmin) {
return new Curly(prev, cmin, MAX_REPS, qtype);
}

/**
* Processing repetition of a Unicode linebreak \\R.
*/
private Node closureOfLineEnding(LineEnding le) {
int ch = peek();
if (ch != '?' && ch != '*' && ch != '+' && ch != '{') {
return le;
}

// Replace the LineEnding with an anonymous group
// (?:\\u000D\\u000A|[\\u000A\\u000B\\u000C\\u000D\\u0085\\u2028\\u2029])
Node grHead = createGroup(true);
Node grTail = root;
BranchConn branchConn = new BranchConn();
branchConn.next = grTail;
Node slice = new Slice(new int[] {0x0D, 0x0A});
slice.next = branchConn;
Node chClass = newCharProperty(x -> x == 0x0A || x == 0x0B ||
x == 0x0C || x == 0x0D || x == 0x85 || x == 0x2028 ||
x == 0x2029);
chClass.next = branchConn;
grHead.next = new Branch(slice, chClass, branchConn);
return groupWithClosure(closure(grHead), grHead, grTail, false);
}

/**
* Processes repetition. If the next character peeked is a quantifier
* then new nodes must be appended to handle the repetition.
@@ -4777,19 +4722,18 @@ boolean study(TreeInfo info) {
static final class Branch extends Node {
Node[] atoms = new Node[2];
int size = 2;
BranchConn conn;
Branch(Node first, Node second, BranchConn branchConn) {
Node conn;
Branch(Node first, Node second, Node branchConn) {
conn = branchConn;
atoms[0] = first;
atoms[1] = second;
}

void add(Node node) {
if (size >= atoms.length) {
int len = ArraysSupport.newLength(size,
1, /* minimum growth */
size /* preferred growth */);
atoms = Arrays.copyOf(atoms, len);
Node[] tmp = new Node[atoms.length*2];
System.arraycopy(atoms, 0, tmp, 0, atoms.length);
atoms = tmp;
}
atoms[size++] = node;
}
@@ -36,7 +36,7 @@
* 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895
* 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706
* 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034 8235812
* 8216332 8214245 8237599 8241055 8247546
* 8216332 8214245 8237599 8241055 8247546 8258259
*
* @library /test/lib
* @library /lib/testlibrary/java/lang
@@ -5063,7 +5063,15 @@ private static void surrogatePairWithCanonEq() {
report("surrogatePairWithCanonEq");
}

// This test is for 8235812
private static String s2x(String s) {
StringBuilder sb = new StringBuilder();
for (char ch : s.toCharArray()) {
sb.append(String.format("\\u%04x", (int)ch));
}
return sb.toString();
}

// This test is for 8235812, with cases excluded by 8258259
private static void lineBreakWithQuantifier() {
// key: pattern
// value: lengths of input that must match the pattern
@@ -5073,22 +5081,22 @@ private static void lineBreakWithQuantifier() {
Map.entry("\\R+", List.of(1, 2, 3)),
Map.entry("\\R{0}", List.of(0)),
Map.entry("\\R{1}", List.of(1)),
Map.entry("\\R{2}", List.of(2)),
Map.entry("\\R{3}", List.of(3)),
// Map.entry("\\R{2}", List.of(2)), // 8258259
// Map.entry("\\R{3}", List.of(3)), // 8258259
Map.entry("\\R{0,}", List.of(0, 1, 2, 3)),
Map.entry("\\R{1,}", List.of(1, 2, 3)),
Map.entry("\\R{2,}", List.of(2, 3)),
Map.entry("\\R{3,}", List.of(3)),
// Map.entry("\\R{2,}", List.of(2, 3)), // 8258259
// Map.entry("\\R{3,}", List.of(3)), // 8258259
Map.entry("\\R{0,0}", List.of(0)),
Map.entry("\\R{0,1}", List.of(0, 1)),
Map.entry("\\R{0,2}", List.of(0, 1, 2)),
Map.entry("\\R{0,3}", List.of(0, 1, 2, 3)),
Map.entry("\\R{1,1}", List.of(1)),
Map.entry("\\R{1,2}", List.of(1, 2)),
Map.entry("\\R{1,3}", List.of(1, 2, 3)),
Map.entry("\\R{2,2}", List.of(2)),
Map.entry("\\R{2,3}", List.of(2, 3)),
Map.entry("\\R{3,3}", List.of(3)),
// Map.entry("\\R{2,2}", List.of(2)), // 8258259
// Map.entry("\\R{2,3}", List.of(2, 3)), // 8258259
// Map.entry("\\R{3,3}", List.of(3)), // 8258259
Map.entry("\\R", List.of(1)),
Map.entry("\\R\\R", List.of(2)),
Map.entry("\\R\\R\\R", List.of(3))
@@ -5131,7 +5139,7 @@ private static void lineBreakWithQuantifier() {
if (!m.reset(in).matches()) {
failCount++;
System.err.println("Expected to match '" +
in + "' =~ /" + p + "/");
s2x(in) + "' =~ /" + p + "/");
}
}
}

1 comment on commit cbc3fee

@openjdk-notifier

This comment has been minimized.

Copy link

@openjdk-notifier openjdk-notifier bot commented on cbc3fee Dec 18, 2020

Please sign in to comment.