Skip to content

Commit

Permalink
Convert HTML and PDF anchoring over to the new quote anchoring module
Browse files Browse the repository at this point in the history
This resolves issues of:

 - Poor performance when a page has many annotations that do not anchor
 - The suffix not being taken into account when there is a match for the
   prefix

Fixes hypothesis#189
Fixes hypothesis#1022
  • Loading branch information
robertknight committed Oct 30, 2019
1 parent 5883246 commit 2a88ae7
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 41 deletions.
56 changes: 28 additions & 28 deletions src/annotator/anchoring/pdf.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
/* global PDFViewerApplication */

const RenderingStates = require('../pdfjs-rendering-states');
const { TextQuoteAnchor } = require('./types');
const { toTextPosition, fromTextPosition } = require('./text-quote');
const {
fromRange: textPositionFromRange,
toRange: textPositionToRange,
Expand Down Expand Up @@ -243,7 +243,7 @@ async function anchorByPosition(pageIndex, anchor) {
*
* @param {number[]} pageIndexes - Pages to search in priority order
* @param {TextQuoteSelector} quoteSelector
* @param {Object} positionHint - Options to pass to `TextQuoteAnchor#toPositionAnchor`
* @param {TextRange} positionHint - Expected location of quote within document
* @return {Promise<Range>} Location of quote
*/
function findInPages(pageIndexes, quoteSelector, positionHint) {
Expand All @@ -258,15 +258,15 @@ function findInPages(pageIndexes, quoteSelector, positionHint) {
const offset = getPageOffset(pageIndex);

const attempt = ([content, offset]) => {
const root = { textContent: content };
const anchor = TextQuoteAnchor.fromSelector(root, quoteSelector);
if (positionHint) {
let hint = positionHint.start - offset;
hint = Math.max(0, hint);
hint = Math.min(hint, content.length);
return anchor.toPositionAnchor({ hint });
}
return anchor.toPositionAnchor();
const { exact, prefix, suffix } = quoteSelector;

// Try to locate the quote in the page's text.
//
// This does not currently use `positionHint` to disambiguate matches,
// but it could do.
const [start, end] = toTextPosition(content, exact, prefix, suffix);

return { start, end };
};

const next = () => findInPages(rest, quoteSelector, positionHint);
Expand Down Expand Up @@ -437,11 +437,9 @@ function getTextLayers(range) {
*
* @param {HTMLElement} root - The root element
* @param {Range} range
* @param {Object} options -
* Options passed to `TextQuoteAnchor`'s `toSelector` method.
* @return {Promise<[TextPositionSelector, TextQuoteSelector]>}
*/
async function describe(root, range, options = {}) {
async function describe(root, range) {
const textLayers = getTextLayers(range);

if (textLayers.length === 0) {
Expand All @@ -458,28 +456,30 @@ async function describe(root, range, options = {}) {
const pageOffset = await getPageOffset(startPageIndex);

// Get the offset within the text layer where the selection occurs.
let [startPos, endPos] = textPositionFromRange(startTextLayer, range);

// Get a range which is cropped to the text layer, i.e. it excludes any text
// outside of it that the user may have selected.
const quoteRange = textPositionToRange(startTextLayer, startPos, endPos);
let [startPosInPage, endPosInPage] = textPositionFromRange(
startTextLayer,
range
);

// Adjust the start and end offsets to refer to the whole document,
// not just the current page.
startPos += pageOffset;
endPos += pageOffset;
const startPosInDocument = startPosInPage + pageOffset;
const endPosInDocument = endPosInPage + pageOffset;

const position = {
type: 'TextPositionSelector',
start: startPos,
end: endPos,
start: startPosInDocument,
end: endPosInDocument,
};

const quote = TextQuoteAnchor.fromRange(
startTextLayer,
quoteRange,
options
).toSelector(options);
const quote = {
type: 'TextQuoteSelector',
...fromTextPosition(
startTextLayer.textContent,
startPosInPage,
endPosInPage
),
};

return [position, quote];
}
Expand Down
6 changes: 4 additions & 2 deletions src/annotator/anchoring/test/pdf-test.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
'use strict';

const domAnchorTextQuote = require('dom-anchor-text-quote');
const { toRange } = require('../text-position');
const { toTextPosition } = require('../text-quote');

const FakePDFViewerApplication = require('./fake-pdf-viewer-application');
const pdfAnchoring = require('../pdf');
Expand All @@ -13,7 +14,8 @@ const pdfAnchoring = require('../pdf');
* @return {Range}
*/
function findText(container, text) {
return domAnchorTextQuote.toRange(container, { exact: text });
const [start, end] = toTextPosition(container.textContent, text);
return toRange(container, start, end);
}

function delay(ms) {
Expand Down
20 changes: 9 additions & 11 deletions src/annotator/anchoring/types.coffee
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
# 2. Insulating the rest of the code from API changes in the underyling anchoring
# libraries.

domAnchorTextQuote = require('dom-anchor-text-quote')

textQuote = require('./text-quote')
textPosition = require('./text-position')
xpathRange = require('./range')

Expand Down Expand Up @@ -94,7 +93,9 @@ class TextQuoteAnchor
@context = context

@fromRange: (root, range, options) ->
selector = domAnchorTextQuote.fromRange(root, range, options)
[start, end] = textPosition.fromRange(root, range)
{ prefix, exact, suffix } = textQuote.fromTextPosition(root.textContent, start, end)
selector = { type: 'TextQuoteSelector', exact, prefix, suffix }
TextQuoteAnchor.fromSelector(root, selector)

@fromSelector: (root, selector) ->
Expand All @@ -110,16 +111,13 @@ class TextQuoteAnchor
}

toRange: (options = {}) ->
range = domAnchorTextQuote.toRange(@root, this.toSelector(), options)
if range == null
throw new Error('Quote not found')
range
anchor = @toPositionAnchor()
textPosition.toRange(@root, anchor.start, anchor.end)

toPositionAnchor: (options = {}) ->
anchor = domAnchorTextQuote.toTextPosition(@root, this.toSelector(), options)
if anchor == null
throw new Error('Quote not found')
new TextPositionAnchor(@root, anchor.start, anchor.end)
{ exact, prefix, suffix } = this.toSelector()
[start, end] = textQuote.toTextPosition(@root.textContent, exact, prefix, suffix)
new TextPositionAnchor(@root, start, end)


exports.RangeAnchor = RangeAnchor
Expand Down

0 comments on commit 2a88ae7

Please sign in to comment.