Skip to content

Commit

Permalink
Preserve the whitespaces when getting text from FreeText annotations …
Browse files Browse the repository at this point in the history
…(bug 1871353)

When the text of an annotation is extracted in using getTextContent, consecutive white spaces
are just replaced by one space and. So this patch add an option to make sure that white
spaces are preserved when appearance is parsed.
For the case where there's no appearance, we can have a fast path to get the correct string
from the Content entry.
When an existing FreeText is edited, space (0x20) are replaced by non-breakable (0xa0) ones
to make to see all of them on screen.
  • Loading branch information
calixteman committed Jan 2, 2024
1 parent 231c798 commit 808a830
Show file tree
Hide file tree
Showing 8 changed files with 148 additions and 32 deletions.
69 changes: 45 additions & 24 deletions src/core/annotation.js
Original file line number Diff line number Diff line change
Expand Up @@ -1175,6 +1175,10 @@ class Annotation {
return;
}

if (this.data.textContent) {
return;
}

const resources = await this.loadResources(
["ExtGState", "Font", "Properties", "XObject"],
this.appearance
Expand Down Expand Up @@ -1207,6 +1211,7 @@ class Annotation {
task,
resources,
includeMarkedContent: true,
keepWhiteSpace: true,
sink,
viewBox,
});
Expand All @@ -1218,20 +1223,26 @@ class Annotation {

if (text.length > 1 || text[0]) {
const appearanceDict = this.appearance.dict;
const bbox = appearanceDict.getArray("BBox") || [0, 0, 1, 1];
const matrix = appearanceDict.getArray("Matrix") || [1, 0, 0, 1, 0, 0];
const rect = this.data.rect;
const transform = getTransformMatrix(rect, bbox, matrix);
transform[4] -= rect[0];
transform[5] -= rect[1];
firstPosition = Util.applyTransform(firstPosition, transform);
firstPosition = Util.applyTransform(firstPosition, matrix);

this.data.textPosition = firstPosition;
this.data.textPosition = this._transformPoint(
firstPosition,
appearanceDict.getArray("BBox"),
appearanceDict.getArray("Matrix")
);
this.data.textContent = text;
}
}

_transformPoint(coords, bbox, matrix) {
const { rect } = this.data;
bbox ||= [0, 0, 1, 1];
matrix ||= [1, 0, 0, 1, 0, 0];
const transform = getTransformMatrix(rect, bbox, matrix);
transform[4] -= rect[0];
transform[5] -= rect[1];
coords = Util.applyTransform(coords, transform);
return Util.applyTransform(coords, matrix);
}

/**
* Get field data for usage in JS sandbox.
*
Expand Down Expand Up @@ -3768,31 +3779,41 @@ class FreeTextAnnotation extends MarkupAnnotation {
this.data.annotationType = AnnotationType.FREETEXT;
this.setDefaultAppearance(params);
if (this.appearance) {
this.data.textContent = "";
const { fontColor, fontSize } = parseAppearanceStream(
this.appearance,
evaluatorOptions,
xref
);
this.data.defaultAppearanceData.fontColor = fontColor;
this.data.defaultAppearanceData.fontSize = fontSize || 10;
} else if (this._isOffscreenCanvasSupported) {
const strokeAlpha = params.dict.get("CA");
const fakeUnicodeFont = new FakeUnicodeFont(xref, "sans-serif");
this.data.defaultAppearanceData.fontSize ||= 10;
} else {
const { fontColor, fontSize } = this.data.defaultAppearanceData;
this.appearance = fakeUnicodeFont.createAppearance(
this._contents.str,
this.data.textContent = this._contents.str.split(/\r\n?|\n/);
const { coords, bbox, matrix } = FakeUnicodeFont.getFirstPositionInfo(
this.rectangle,
this.rotation,
fontSize,
fontColor,
strokeAlpha
);
this._streams.push(this.appearance, FakeUnicodeFont.toUnicodeStream);
} else {
warn(
"FreeTextAnnotation: OffscreenCanvas is not supported, annotation may not render correctly."
fontSize
);
this.data.textPosition = this._transformPoint(coords, bbox, matrix);
if (this._isOffscreenCanvasSupported) {
const strokeAlpha = params.dict.get("CA");
const fakeUnicodeFont = new FakeUnicodeFont(xref, "sans-serif");
this.data.defaultAppearanceData.fontSize ||= 10;
this.appearance = fakeUnicodeFont.createAppearance(
this._contents.str,
this.rectangle,
this.rotation,
fontSize,
fontColor,
strokeAlpha
);
this._streams.push(this.appearance, FakeUnicodeFont.toUnicodeStream);
} else {
warn(
"FreeTextAnnotation: OffscreenCanvas is not supported, annotation may not render correctly."
);
}
}
}

Expand Down
20 changes: 20 additions & 0 deletions src/core/default_appearance.js
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,26 @@ endcmap CMapName currentdict /CMap defineresource pop end end`;
return this.resources;
}

static getFirstPositionInfo(rect, rotation, fontSize) {
// Get the position of the first char in the rect.
const [x1, y1, x2, y2] = rect;
let w = x2 - x1;
let h = y2 - y1;

if (rotation % 180 !== 0) {
[w, h] = [h, w];
}
const lineHeight = LINE_FACTOR * fontSize;
const lineDescent = LINE_DESCENT_FACTOR * fontSize;

return {
coords: [0, h + lineDescent - lineHeight],
bbox: [0, 0, w, h],
matrix:
rotation !== 0 ? getRotationMatrix(rotation, h, lineHeight) : undefined,
};
}

createAppearance(text, rect, rotation, fontSize, bgColor, strokeAlpha) {
const ctx = this._createContext();
const lines = [];
Expand Down
6 changes: 4 additions & 2 deletions src/core/evaluator.js
Original file line number Diff line number Diff line change
Expand Up @@ -2281,6 +2281,7 @@ class PartialEvaluator {
viewBox,
markedContentData = null,
disableNormalization = false,
keepWhiteSpace = false,
}) {
// Ensure that `resources`/`stateManager` is correctly initialized,
// even if the provided parameter is e.g. `null`.
Expand Down Expand Up @@ -2347,11 +2348,12 @@ class PartialEvaluator {
twoLastChars[twoLastCharsPos] = char;
twoLastCharsPos = nextPos;

return ret;
return !keepWhiteSpace && ret;
}

function shouldAddWhitepsace() {
return (
!keepWhiteSpace &&
twoLastChars[twoLastCharsPos] !== " " &&
twoLastChars[(twoLastCharsPos + 1) % 2] === " "
);
Expand Down Expand Up @@ -2836,7 +2838,7 @@ class PartialEvaluator {
}
let scaledDim = glyphWidth * scale;

if (category.isWhitespace) {
if (!keepWhiteSpace && category.isWhitespace) {
// Don't push a " " in the textContentItem
// (except when it's between two non-spaces chars),
// it will be done (if required) in next call to
Expand Down
13 changes: 10 additions & 3 deletions src/display/editor/freetext.js
Original file line number Diff line number Diff line change
Expand Up @@ -648,6 +648,14 @@ class FreeTextEditor extends AnnotationEditor {
}
}

#serializeContent() {
return this.#content.replaceAll("\u00a0", " ");
}

static #deserializeContent(content) {
return content.replaceAll(" ", "\u00a0");
}

/** @inheritdoc */
get contentDiv() {
return this.editorDiv;
Expand Down Expand Up @@ -690,10 +698,9 @@ class FreeTextEditor extends AnnotationEditor {
};
}
const editor = super.deserialize(data, parent, uiManager);

editor.#fontSize = data.fontSize;
editor.#color = Util.makeHexColor(...data.color);
editor.#content = data.value;
editor.#content = FreeTextEditor.#deserializeContent(data.value);
editor.annotationElementId = data.id || null;
editor.#initialData = initialData;

Expand Down Expand Up @@ -726,7 +733,7 @@ class FreeTextEditor extends AnnotationEditor {
annotationType: AnnotationEditorType.FREETEXT,
color,
fontSize: this.#fontSize,
value: this.#content,
value: this.#serializeContent(),
pageIndex: this.pageIndex,
rect,
rotation: this.rotation,
Expand Down
70 changes: 67 additions & 3 deletions test/integration/freetext_editor_spec.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -209,11 +209,11 @@ describe("FreeText Editor", () => {
await waitForStorageEntries(page, 2);

const content = await page.$eval(getEditorSelector(0), el =>
el.innerText.trimEnd()
el.innerText.trimEnd().replaceAll("\xa0", " ")
);

let pastedContent = await page.$eval(getEditorSelector(1), el =>
el.innerText.trimEnd()
el.innerText.trimEnd().replaceAll("\xa0", " ")
);

expect(pastedContent).withContext(`In ${browserName}`).toEqual(content);
Expand All @@ -225,7 +225,7 @@ describe("FreeText Editor", () => {
await waitForStorageEntries(page, 3);

pastedContent = await page.$eval(getEditorSelector(2), el =>
el.innerText.trimEnd()
el.innerText.trimEnd().replaceAll("\xa0", " ")
);
expect(pastedContent).withContext(`In ${browserName}`).toEqual(content);
}
Expand Down Expand Up @@ -3182,4 +3182,68 @@ describe("FreeText Editor", () => {
);
});
});

describe("Consecutive white spaces in Freetext without appearance", () => {
let pages;

beforeAll(async () => {
pages = await loadAndWait("bug1871353.pdf", ".annotationEditorLayer");
});

afterAll(async () => {
await closePages(pages);
});

it("must check that consecutive white spaces are preserved when a freetext is edited", async () => {
await Promise.all(
pages.map(async ([browserName, page]) => {
await switchToFreeText(page);
await page.click(getEditorSelector(0), { count: 2 });
await page.type(`${getEditorSelector(0)} .internal`, "C");

await page.click("#editorFreeText");
await page.waitForSelector(
`.annotationEditorLayer:not(.freetextEditing)`
);

const [value] = await getSerialized(page, x => x.value);
expect(value)
.withContext(`In ${browserName}`)
.toEqual("CA B");
})
);
});
});

describe("Consecutive white spaces in Freetext with appearance", () => {
let pages;

beforeAll(async () => {
pages = await loadAndWait("bug1871353.1.pdf", ".annotationEditorLayer");
});

afterAll(async () => {
await closePages(pages);
});

it("must check that consecutive white spaces are preserved when a freetext is edited", async () => {
await Promise.all(
pages.map(async ([browserName, page]) => {
await switchToFreeText(page);
await page.click(getEditorSelector(0), { count: 2 });
await page.type(`${getEditorSelector(0)} .internal`, "Z");

await page.click("#editorFreeText");
await page.waitForSelector(
`.annotationEditorLayer:not(.freetextEditing)`
);

const [value] = await getSerialized(page, x => x.value);
expect(value)
.withContext(`In ${browserName}`)
.toEqual("ZX Y");
})
);
});
});
});
2 changes: 2 additions & 0 deletions test/pdfs/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -620,3 +620,5 @@
!issue17215.pdf
!bug1863910.pdf
!bug1865341.pdf
!bug1871353.pdf
!bug1871353.1.pdf
Binary file added test/pdfs/bug1871353.1.pdf
Binary file not shown.
Binary file added test/pdfs/bug1871353.pdf
Binary file not shown.

0 comments on commit 808a830

Please sign in to comment.