Skip to content

Commit

Permalink
Merge pull request #18125 from Snuffleupagus/issue-18122
Browse files Browse the repository at this point in the history
Support corrupt PDF documents that contain "endsteam" commands (issue 18122)
  • Loading branch information
Snuffleupagus committed May 21, 2024
2 parents 661a62b + 57014d0 commit dce9c6d
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 43 deletions.
97 changes: 54 additions & 43 deletions src/core/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -609,12 +609,27 @@ class Parser {
return imageStream;
}

_findStreamLength(startPos, signature) {
#findStreamLength(startPos) {
const { stream } = this.lexer;
stream.pos = startPos;

const SCAN_BLOCK_LENGTH = 2048;
const signatureLength = signature.length;
const signatureLength = "endstream".length;

const END_SIGNATURE = new Uint8Array([0x65, 0x6e, 0x64]);
const endLength = END_SIGNATURE.length;

// Ideally we'd directly search for "endstream", however there are corrupt
// PDF documents where the command is incomplete; hence we search for:
// 1. The normal case.
// 2. The misspelled case (fixes issue18122.pdf).
// 3. The truncated case (fixes issue10004.pdf).
const PARTIAL_SIGNATURE = [
new Uint8Array([0x73, 0x74, 0x72, 0x65, 0x61, 0x6d]), // "stream"
new Uint8Array([0x73, 0x74, 0x65, 0x61, 0x6d]), // "steam",
new Uint8Array([0x73, 0x74, 0x72, 0x65, 0x61]), // "strea"
];
const normalLength = signatureLength - endLength;

while (stream.pos < stream.end) {
const scanBytes = stream.peekBytes(SCAN_BLOCK_LENGTH);
Expand All @@ -626,13 +641,43 @@ class Parser {
let pos = 0;
while (pos < scanLength) {
let j = 0;
while (j < signatureLength && scanBytes[pos + j] === signature[j]) {
while (j < endLength && scanBytes[pos + j] === END_SIGNATURE[j]) {
j++;
}
if (j >= signatureLength) {
// `signature` found.
stream.pos += pos;
return stream.pos - startPos;
if (j >= endLength) {
// "end" found, find the complete command.
let found = false;
for (const part of PARTIAL_SIGNATURE) {
const partLen = part.length;
let k = 0;
while (k < partLen && scanBytes[pos + j + k] === part[k]) {
k++;
}
if (k >= normalLength) {
// Found "endstream" command.
found = true;
break;
}
if (k >= partLen) {
// Found "endsteam" or "endstea" command.
// Ensure that the byte immediately following the corrupt
// endstream command is a space, to prevent false positives.
const lastByte = scanBytes[pos + j + k];
if (isWhiteSpace(lastByte)) {
info(
`Found "${bytesToString([...END_SIGNATURE, ...part])}" when ` +
"searching for endstream command."
);
found = true;
}
break;
}
}

if (found) {
stream.pos += pos;
return stream.pos - startPos;
}
}
pos++;
}
Expand Down Expand Up @@ -665,43 +710,9 @@ class Parser {
this.shift(); // 'stream'
} else {
// Bad stream length, scanning for endstream command.
const ENDSTREAM_SIGNATURE = new Uint8Array([
0x65, 0x6e, 0x64, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d,
]);
let actualLength = this._findStreamLength(startPos, ENDSTREAM_SIGNATURE);
const actualLength = this.#findStreamLength(startPos);
if (actualLength < 0) {
// Only allow limited truncation of the endstream signature,
// to prevent false positives.
const MAX_TRUNCATION = 1;
// Check if the PDF generator included truncated endstream commands,
// such as e.g. "endstrea" (fixes issue10004.pdf).
for (let i = 1; i <= MAX_TRUNCATION; i++) {
const end = ENDSTREAM_SIGNATURE.length - i;
const TRUNCATED_SIGNATURE = ENDSTREAM_SIGNATURE.slice(0, end);

const maybeLength = this._findStreamLength(
startPos,
TRUNCATED_SIGNATURE
);
if (maybeLength >= 0) {
// Ensure that the byte immediately following the truncated
// endstream command is a space, to prevent false positives.
const lastByte = stream.peekBytes(end + 1)[end];
if (!isWhiteSpace(lastByte)) {
break;
}
info(
`Found "${bytesToString(TRUNCATED_SIGNATURE)}" when ` +
"searching for endstream command."
);
actualLength = maybeLength;
break;
}
}

if (actualLength < 0) {
throw new FormatError("Missing endstream command.");
}
throw new FormatError("Missing endstream command.");
}
length = actualLength;

Expand Down
1 change: 1 addition & 0 deletions test/pdfs/issue18122.pdf.link
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
https://github.com/mozilla/pdf.js/files/15385405/Factuur_1105490.pdf
9 changes: 9 additions & 0 deletions test/test_manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -5420,6 +5420,15 @@
"rounds": 1,
"type": "text"
},
{
"id": "issue18122",
"file": "pdfs/issue18122.pdf",
"md5": "b9be483e6bc0f35994f536d8f6a685c1",
"rounds": 1,
"link": true,
"lastPage": 1,
"type": "eq"
},
{
"id": "issue4890",
"file": "pdfs/issue4890.pdf",
Expand Down

0 comments on commit dce9c6d

Please sign in to comment.