From 2a88ae722a3840a912e91032defa013e432bda68 Mon Sep 17 00:00:00 2001 From: Robert Knight Date: Wed, 30 Oct 2019 06:40:27 +0000 Subject: [PATCH] Convert HTML and PDF anchoring over to the new quote anchoring module This resolves issues of: - Poor performance when a page has many annotations that do not anchor - The suffix not being taken into account when there is a match for the prefix Fixes #189 Fixes #1022 --- src/annotator/anchoring/pdf.js | 56 ++++++++++++------------ src/annotator/anchoring/test/pdf-test.js | 6 ++- src/annotator/anchoring/types.coffee | 20 ++++----- 3 files changed, 41 insertions(+), 41 deletions(-) diff --git a/src/annotator/anchoring/pdf.js b/src/annotator/anchoring/pdf.js index 39d8239a111..070d18e26af 100644 --- a/src/annotator/anchoring/pdf.js +++ b/src/annotator/anchoring/pdf.js @@ -3,7 +3,7 @@ /* global PDFViewerApplication */ const RenderingStates = require('../pdfjs-rendering-states'); -const { TextQuoteAnchor } = require('./types'); +const { toTextPosition, fromTextPosition } = require('./text-quote'); const { fromRange: textPositionFromRange, toRange: textPositionToRange, @@ -243,7 +243,7 @@ async function anchorByPosition(pageIndex, anchor) { * * @param {number[]} pageIndexes - Pages to search in priority order * @param {TextQuoteSelector} quoteSelector - * @param {Object} positionHint - Options to pass to `TextQuoteAnchor#toPositionAnchor` + * @param {TextRange} positionHint - Expected location of quote within document * @return {Promise} Location of quote */ function findInPages(pageIndexes, quoteSelector, positionHint) { @@ -258,15 +258,15 @@ function findInPages(pageIndexes, quoteSelector, positionHint) { const offset = getPageOffset(pageIndex); const attempt = ([content, offset]) => { - const root = { textContent: content }; - const anchor = TextQuoteAnchor.fromSelector(root, quoteSelector); - if (positionHint) { - let hint = positionHint.start - offset; - hint = Math.max(0, hint); - hint = Math.min(hint, content.length); - return anchor.toPositionAnchor({ hint }); - } - return anchor.toPositionAnchor(); + const { exact, prefix, suffix } = quoteSelector; + + // Try to locate the quote in the page's text. + // + // This does not currently use `positionHint` to disambiguate matches, + // but it could do. + const [start, end] = toTextPosition(content, exact, prefix, suffix); + + return { start, end }; }; const next = () => findInPages(rest, quoteSelector, positionHint); @@ -437,11 +437,9 @@ function getTextLayers(range) { * * @param {HTMLElement} root - The root element * @param {Range} range - * @param {Object} options - - * Options passed to `TextQuoteAnchor`'s `toSelector` method. * @return {Promise<[TextPositionSelector, TextQuoteSelector]>} */ -async function describe(root, range, options = {}) { +async function describe(root, range) { const textLayers = getTextLayers(range); if (textLayers.length === 0) { @@ -458,28 +456,30 @@ async function describe(root, range, options = {}) { const pageOffset = await getPageOffset(startPageIndex); // Get the offset within the text layer where the selection occurs. - let [startPos, endPos] = textPositionFromRange(startTextLayer, range); - - // Get a range which is cropped to the text layer, i.e. it excludes any text - // outside of it that the user may have selected. - const quoteRange = textPositionToRange(startTextLayer, startPos, endPos); + let [startPosInPage, endPosInPage] = textPositionFromRange( + startTextLayer, + range + ); // Adjust the start and end offsets to refer to the whole document, // not just the current page. - startPos += pageOffset; - endPos += pageOffset; + const startPosInDocument = startPosInPage + pageOffset; + const endPosInDocument = endPosInPage + pageOffset; const position = { type: 'TextPositionSelector', - start: startPos, - end: endPos, + start: startPosInDocument, + end: endPosInDocument, }; - const quote = TextQuoteAnchor.fromRange( - startTextLayer, - quoteRange, - options - ).toSelector(options); + const quote = { + type: 'TextQuoteSelector', + ...fromTextPosition( + startTextLayer.textContent, + startPosInPage, + endPosInPage + ), + }; return [position, quote]; } diff --git a/src/annotator/anchoring/test/pdf-test.js b/src/annotator/anchoring/test/pdf-test.js index 07ce7856581..6fcea688f9e 100644 --- a/src/annotator/anchoring/test/pdf-test.js +++ b/src/annotator/anchoring/test/pdf-test.js @@ -1,6 +1,7 @@ 'use strict'; -const domAnchorTextQuote = require('dom-anchor-text-quote'); +const { toRange } = require('../text-position'); +const { toTextPosition } = require('../text-quote'); const FakePDFViewerApplication = require('./fake-pdf-viewer-application'); const pdfAnchoring = require('../pdf'); @@ -13,7 +14,8 @@ const pdfAnchoring = require('../pdf'); * @return {Range} */ function findText(container, text) { - return domAnchorTextQuote.toRange(container, { exact: text }); + const [start, end] = toTextPosition(container.textContent, text); + return toRange(container, start, end); } function delay(ms) { diff --git a/src/annotator/anchoring/types.coffee b/src/annotator/anchoring/types.coffee index 5e9e0b8f99e..3031dfe71a5 100644 --- a/src/annotator/anchoring/types.coffee +++ b/src/annotator/anchoring/types.coffee @@ -6,8 +6,7 @@ # 2. Insulating the rest of the code from API changes in the underyling anchoring # libraries. -domAnchorTextQuote = require('dom-anchor-text-quote') - +textQuote = require('./text-quote') textPosition = require('./text-position') xpathRange = require('./range') @@ -94,7 +93,9 @@ class TextQuoteAnchor @context = context @fromRange: (root, range, options) -> - selector = domAnchorTextQuote.fromRange(root, range, options) + [start, end] = textPosition.fromRange(root, range) + { prefix, exact, suffix } = textQuote.fromTextPosition(root.textContent, start, end) + selector = { type: 'TextQuoteSelector', exact, prefix, suffix } TextQuoteAnchor.fromSelector(root, selector) @fromSelector: (root, selector) -> @@ -110,16 +111,13 @@ class TextQuoteAnchor } toRange: (options = {}) -> - range = domAnchorTextQuote.toRange(@root, this.toSelector(), options) - if range == null - throw new Error('Quote not found') - range + anchor = @toPositionAnchor() + textPosition.toRange(@root, anchor.start, anchor.end) toPositionAnchor: (options = {}) -> - anchor = domAnchorTextQuote.toTextPosition(@root, this.toSelector(), options) - if anchor == null - throw new Error('Quote not found') - new TextPositionAnchor(@root, anchor.start, anchor.end) + { exact, prefix, suffix } = this.toSelector() + [start, end] = textQuote.toTextPosition(@root.textContent, exact, prefix, suffix) + new TextPositionAnchor(@root, start, end) exports.RangeAnchor = RangeAnchor