Skip to content

Commit

Permalink
Merge pull request #15760 from calixteman/15759
Browse files Browse the repository at this point in the history
Don't add an extra space after a Katakana or a Hiragana at the eol when searching
  • Loading branch information
calixteman committed Nov 29, 2022
2 parents 44bc315 + ea19959 commit 5d79cc5
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 1 deletion.
1 change: 1 addition & 0 deletions test/pdfs/.gitignore
Expand Up @@ -557,3 +557,4 @@
!freetext_no_appearance.pdf
!issue15690.pdf
!bug1802888.pdf
!issue15759.pdf
Binary file added test/pdfs/issue15759.pdf
Binary file not shown.
21 changes: 21 additions & 0 deletions test/unit/pdf_find_controller_spec.js
Expand Up @@ -668,4 +668,25 @@ describe("pdf_find_controller", function () {
pageMatchesLength: [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]],
});
});

it("performs a search in a text with some Katakana at the end of a line", async function () {
const { eventBus, pdfFindController } = await initPdfFindController(
"issue15759.pdf"
);

await testSearch({
eventBus,
pdfFindController,
state: {
query: "ソレノイド",
},
matchesPerPage: [1],
selectedMatch: {
pageIndex: 0,
matchIndex: 0,
},
pageMatches: [[6]],
pageMatchesLength: [[5]],
});
});
});
6 changes: 5 additions & 1 deletion web/pdf_find_controller.js
Expand Up @@ -132,7 +132,11 @@ function normalize(text) {
"\u3244-\u32bf" + // Circled ideograms/numbers.
"\u32d0-\u32fe" + // Circled ideograms.
"\uff00-\uffef"; // Halfwidth, fullwidth forms.
const regexp = `([${replace}])|([${toNormalizeWithNFKC}])|(\\p{M}+(?:-\\n)?)|(\\S-\\n)|(\\p{Ideographic}\\n)|(\\n)`;

// 3040-309F: Hiragana
// 30A0-30FF: Katakana
const CJK = "(?:\\p{Ideographic}|[\u3040-\u30FF])";
const regexp = `([${replace}])|([${toNormalizeWithNFKC}])|(\\p{M}+(?:-\\n)?)|(\\S-\\n)|(${CJK}\\n)|(\\n)`;

if (syllablePositions.length === 0) {
// Most of the syllables belong to Hangul so there are no need
Expand Down

0 comments on commit 5d79cc5

Please sign in to comment.