Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
8237599: Greedy matching against supplementary chars fails to respect…
… the region
Reviewed-by: rriggs
- Loading branch information
|
@@ -4340,14 +4340,22 @@ boolean study(TreeInfo info) { |
|
|
this.cmin = cmin; |
|
|
} |
|
|
boolean match(Matcher matcher, int i, CharSequence seq) { |
|
|
int starti = i; |
|
|
int n = 0; |
|
|
int to = matcher.to; |
|
|
// greedy, all the way down |
|
|
while (i < to) { |
|
|
int ch = Character.codePointAt(seq, i); |
|
|
int len = Character.charCount(ch); |
|
|
if (i + len > to) { |
|
|
// the region cut off the high half of a surrogate pair |
|
|
matcher.hitEnd = true; |
|
|
ch = seq.charAt(i); |
|
|
len = 1; |
|
|
} |
|
|
if (!predicate.is(ch)) |
|
|
break; |
|
|
i += Character.charCount(ch); |
|
|
break; |
|
|
i += len; |
|
|
n++; |
|
|
} |
|
|
if (i >= to) { |
|
@@ -4358,9 +4366,10 @@ boolean match(Matcher matcher, int i, CharSequence seq) { |
|
|
return true; |
|
|
if (n == cmin) |
|
|
return false; |
|
|
// backing off if match fails |
|
|
// backing off if match fails |
|
|
int ch = Character.codePointBefore(seq, i); |
|
|
i -= Character.charCount(ch); |
|
|
// check if the region cut off the low half of a surrogate pair |
|
|
i = Math.max(starti, i - Character.charCount(ch)); |
|
|
n--; |
|
|
} |
|
|
return false; |
|
|
|
@@ -36,7 +36,7 @@ |
|
|
* 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895 |
|
|
* 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706 |
|
|
* 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034 8235812 |
|
|
* 8216332 8214245 |
|
|
* 8216332 8214245 8237599 |
|
|
* |
|
|
* @library /test/lib |
|
|
* @library /lib/testlibrary/java/lang |
|
@@ -195,6 +195,7 @@ public static void main(String[] args) throws Exception { |
|
|
surrogatePairWithCanonEq(); |
|
|
lineBreakWithQuantifier(); |
|
|
caseInsensitivePMatch(); |
|
|
surrogatePairOverlapRegion(); |
|
|
|
|
|
if (failure) { |
|
|
throw new |
|
@@ -5155,4 +5156,45 @@ private static void caseInsensitivePMatch() { |
|
|
} |
|
|
report("caseInsensitivePMatch"); |
|
|
} |
|
|
|
|
|
// This test is for 8237599 |
|
|
private static void surrogatePairOverlapRegion() { |
|
|
String input = "\ud801\udc37"; |
|
|
|
|
|
Pattern p = Pattern.compile(".+"); |
|
|
Matcher m = p.matcher(input); |
|
|
m.region(0, 1); |
|
|
|
|
|
boolean ok = m.find(); |
|
|
if (!ok || !m.group(0).equals(input.substring(0, 1))) |
|
|
{ |
|
|
failCount++; |
|
|
System.out.println("Input \"" + input + "\".substr(0, 1)" + |
|
|
" expected to match pattern \"" + p + "\""); |
|
|
if (ok) { |
|
|
System.out.println("group(0): \"" + m.group(0) + "\""); |
|
|
} |
|
|
} else if (!m.hitEnd()) { |
|
|
failCount++; |
|
|
System.out.println("Expected m.hitEnd() == true"); |
|
|
} |
|
|
|
|
|
p = Pattern.compile(".*(.)"); |
|
|
m = p.matcher(input); |
|
|
m.region(1, 2); |
|
|
|
|
|
ok = m.find(); |
|
|
if (!ok || !m.group(0).equals(input.substring(1, 2)) |
|
|
|| !m.group(1).equals(input.substring(1, 2))) |
|
|
{ |
|
|
failCount++; |
|
|
System.out.println("Input \"" + input + "\".substr(1, 2)" + |
|
|
" expected to match pattern \"" + p + "\""); |
|
|
if (ok) { |
|
|
System.out.println("group(0): \"" + m.group(0) + "\""); |
|
|
System.out.println("group(1): \"" + m.group(1) + "\""); |
|
|
} |
|
|
} |
|
|
report("surrogatePairOverlapRegion"); |
|
|
} |
|
|
} |