Skip to content

Commit

Permalink
8264765: BreakIterator sees bogus sentence boundary in parenthesized …
Browse files Browse the repository at this point in the history
…“i.e.” phrase

Reviewed-by: joehw
  • Loading branch information
naotoj committed Apr 9, 2021
1 parent ec31b3a commit 9ebc497
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 5 deletions.
@@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2007, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -310,7 +310,7 @@ protected final Object[][] getContents() {
// punctuation" and quotation marks
+ "<start-punctuation>=[:Ps::Pi:\\\"\\\'];"

// punctuation with may occur at the end of a sentence: "ending punctuation"
// punctuation which may occur at the end of a sentence: "ending punctuation"
// and quotation marks
+ "<end>=[:Pe::Pf:\\\"\\\'];"

Expand All @@ -323,9 +323,12 @@ protected final Object[][] getContents() {
// periods, which MAY signal the end of a sentence
+ "<period>=[\\.\uff0e];"

// comma, which may not occur at the start of a sentence
+ "<comma>=[\\,];"

// characters that may occur at the beginning of a sentence: basically anything
// not mentioned above (letters and digits are specifically excluded)
+ "<sent-start>=[^[:L:<space><start-punctuation><end><digit><term><period>\u2029<ignore>]];"
+ "<sent-start>=[^[:L:<space><start-punctuation><end><digit><term><period><comma>\u2029<ignore>]];"

// Hindi phrase separator
+ "<danda>=[\u0964\u0965];"
Expand Down
15 changes: 13 additions & 2 deletions test/jdk/java/text/BreakIterator/BreakIteratorTest.java
@@ -1,5 +1,5 @@
/*
* Copyright (c) 1996, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1996, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand All @@ -25,7 +25,7 @@
* @test
* @bug 4035266 4052418 4068133 4068137 4068139 4086052 4095322 4097779
* 4097920 4098467 4111338 4113835 4117554 4143071 4146175 4152117
* 4152416 4153072 4158381 4214367 4217703 4638433
* 4152416 4153072 4158381 4214367 4217703 4638433 8264765
* @library /java/text/testlib
* @run main/timeout=2000 BreakIteratorTest
* @summary test BreakIterator
Expand Down Expand Up @@ -746,6 +746,17 @@ public void TestBug4152117() {
generalIteratorTest(sentenceBreak, sentenceSelectionData);
}

public void TestBug8264765() {
Vector<String> sentenceSelectionData = new Vector<String>();

// Comma should not be regarded as the start of a sentence,
// otherwise the backwards rule would break the following sentence.
sentenceSelectionData.addElement(
"Due to a problem (e.g., software bug), the server is down. ");

generalIteratorTest(sentenceBreak, sentenceSelectionData);
}

public void TestLineBreak() {
Vector<String> lineSelectionData = new Vector<String>();

Expand Down

1 comment on commit 9ebc497

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.