Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[api-minor] Add a new transferPdfData option to allow transferring more data to the worker-thread (bug 1809164) #15908

Merged
merged 1 commit into from Jan 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
25 changes: 15 additions & 10 deletions src/display/api.js
Expand Up @@ -146,9 +146,6 @@ if (typeof PDFJSDev === "undefined" || !PDFJSDev.test("PRODUCTION")) {
* cross-site Access-Control requests should be made using credentials such
* as cookies or authorization headers. The default is `false`.
* @property {string} [password] - For decrypting password-protected PDFs.
* @property {TypedArray} [initialData] - A typed array with the first portion
* or all of the pdf data. Used by the extension since some data is already
* loaded before the switch to range requests.
* @property {number} [length] - The PDF file length. It's used for progress
* reports and range requests operations.
* @property {PDFDataRangeTransport} [range] - Allows for using a custom range
Expand Down Expand Up @@ -192,6 +189,14 @@ if (typeof PDFJSDev === "undefined" || !PDFJSDev.test("PRODUCTION")) {
* @property {number} [maxImageSize] - The maximum allowed image size in total
* pixels, i.e. width * height. Images above this value will not be rendered.
* Use -1 for no limit, which is also the default value.
* @property {boolean} [transferPdfData] - Determines if we can transfer
* TypedArrays used for loading the PDF file, utilized together with:
* - The `data`-option, for the `getDocument` function.
* - The `initialData`-option, for the `PDFDataRangeTransport` constructor.
* - The `chunk`-option, for the `PDFDataTransportStream._onReceiveData`
* method.
* This will help reduce main-thread memory usage, however it will take
* ownership of the TypedArrays. The default value is `false`.
* @property {boolean} [isEvalSupported] - Determines if we can evaluate strings
* as JavaScript. Primarily used to improve performance of font rendering, and
* when parsing PDF functions. The default value is `true`.
Expand Down Expand Up @@ -342,6 +347,7 @@ function getDocument(src) {
params.StandardFontDataFactory =
params.StandardFontDataFactory || DefaultStandardFontDataFactory;
params.ignoreErrors = params.stopAtErrors !== true;
params.transferPdfData = params.transferPdfData === true;
params.fontExtraProperties = params.fontExtraProperties === true;
params.pdfBug = params.pdfBug === true;
params.enableXfa = params.enableXfa === true;
Expand Down Expand Up @@ -439,6 +445,7 @@ function getDocument(src) {
{
length: params.length,
initialData: params.initialData,
transferPdfData: params.transferPdfData,
progressiveDone: params.progressiveDone,
contentDispositionFilename: params.contentDispositionFilename,
disableRange: params.disableRange,
Expand Down Expand Up @@ -513,6 +520,9 @@ async function _fetchDocument(worker, source, pdfDataRangeTransport, docId) {
source.contentDispositionFilename =
pdfDataRangeTransport.contentDispositionFilename;
}
const transfers =
source.transferPdfData && source.data ? [source.data.buffer] : null;

const workerId = await worker.messageHandler.sendWithPromise(
"GetDocRequest",
// Only send the required properties, and *not* the entire `source` object.
Expand Down Expand Up @@ -542,15 +552,10 @@ async function _fetchDocument(worker, source, pdfDataRangeTransport, docId) {
? source.standardFontDataUrl
: null,
},
}
},
transfers
);

// Release the TypedArray data, when it exists, since it's no longer needed
// on the main-thread *after* it's been sent to the worker-thread.
if (source.data) {
source.data = null;
}

if (worker.destroyed) {
throw new Error("Worker was destroyed");
}
Expand Down
43 changes: 30 additions & 13 deletions src/display/transport_stream.js
Expand Up @@ -18,27 +18,41 @@ import { isPdfFile } from "./display_utils.js";

/** @implements {IPDFStream} */
class PDFDataTransportStream {
constructor(params, pdfDataRangeTransport) {
#transferPdfData = false;

constructor(
{
length,
initialData,
transferPdfData = false,
progressiveDone = false,
contentDispositionFilename = null,
disableRange = false,
disableStream = false,
},
pdfDataRangeTransport
) {
assert(
pdfDataRangeTransport,
'PDFDataTransportStream - missing required "pdfDataRangeTransport" argument.'
);

this._queuedChunks = [];
this._progressiveDone = params.progressiveDone || false;
this._contentDispositionFilename =
params.contentDispositionFilename || null;
this.#transferPdfData = transferPdfData;
this._progressiveDone = progressiveDone;
this._contentDispositionFilename = contentDispositionFilename;

const initialData = params.initialData;
if (initialData?.length > 0) {
const buffer = new Uint8Array(initialData).buffer;
const buffer = this.#transferPdfData
? initialData.buffer
: new Uint8Array(initialData).buffer;
this._queuedChunks.push(buffer);
}

this._pdfDataRangeTransport = pdfDataRangeTransport;
this._isStreamingSupported = !params.disableStream;
this._isRangeSupported = !params.disableRange;
this._contentLength = params.length;
this._isStreamingSupported = !disableStream;
this._isRangeSupported = !disableRange;
this._contentLength = length;

this._fullRequestReader = null;
this._rangeReaders = [];
Expand All @@ -62,17 +76,20 @@ class PDFDataTransportStream {
this._pdfDataRangeTransport.transportReady();
}

_onReceiveData(args) {
const buffer = new Uint8Array(args.chunk).buffer;
if (args.begin === undefined) {
_onReceiveData({ begin, chunk }) {
Snuffleupagus marked this conversation as resolved.
Show resolved Hide resolved
const buffer = this.#transferPdfData
? chunk.buffer
: new Uint8Array(chunk).buffer;

if (begin === undefined) {
if (this._fullRequestReader) {
this._fullRequestReader._enqueue(buffer);
} else {
this._queuedChunks.push(buffer);
}
} else {
const found = this._rangeReaders.some(function (rangeReader) {
if (rangeReader._begin !== args.begin) {
if (rangeReader._begin !== begin) {
return false;
}
rangeReader._enqueue(buffer);
Expand Down
80 changes: 80 additions & 0 deletions test/unit/api_spec.js
Expand Up @@ -193,6 +193,45 @@ describe("api", function () {
expect(data[0] instanceof PDFDocumentProxy).toEqual(true);
expect(data[1].loaded / data[1].total).toEqual(1);

// Check that the TypedArray wasn't transferred.
expect(typedArrayPdf.length).toEqual(basicApiFileLength);

await loadingTask.destroy();
});

it("creates pdf doc from TypedArray, with `transferPdfData` set", async function () {
if (isNodeJS) {
pending("Worker is not supported in Node.js.");
}
const typedArrayPdf = await DefaultFileReaderFactory.fetch({
path: TEST_PDFS_PATH + basicApiFileName,
});

// Sanity check to make sure that we fetched the entire PDF file.
expect(typedArrayPdf instanceof Uint8Array).toEqual(true);
expect(typedArrayPdf.length).toEqual(basicApiFileLength);

const loadingTask = getDocument({
data: typedArrayPdf,
transferPdfData: true,
});
expect(loadingTask instanceof PDFDocumentLoadingTask).toEqual(true);

const progressReportedCapability = createPromiseCapability();
loadingTask.onProgress = function (data) {
progressReportedCapability.resolve(data);
};

const data = await Promise.all([
loadingTask.promise,
progressReportedCapability.promise,
]);
expect(data[0] instanceof PDFDocumentProxy).toEqual(true);
expect(data[1].loaded / data[1].total).toEqual(1);

// Check that the TypedArray was transferred.
expect(typedArrayPdf.length).toEqual(0);

await loadingTask.destroy();
});

Expand Down Expand Up @@ -3257,6 +3296,47 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
expect(pdfPage.rotate).toEqual(0);
expect(fetches).toBeGreaterThan(2);

// Check that the TypedArray wasn't transferred.
expect(initialData.length).toEqual(initialDataLength);

await loadingTask.destroy();
});

it("should fetch document info and page using ranges, with `transferPdfData` set", async function () {
if (isNodeJS) {
pending("Worker is not supported in Node.js.");
}
const initialDataLength = 4000;
let fetches = 0;

const data = await dataPromise;
const initialData = new Uint8Array(data.subarray(0, initialDataLength));
const transport = new PDFDataRangeTransport(data.length, initialData);
transport.requestDataRange = function (begin, end) {
fetches++;
waitSome(function () {
transport.onDataProgress(4000);
transport.onDataRange(
begin,
new Uint8Array(data.subarray(begin, end))
);
});
};

const loadingTask = getDocument({
range: transport,
transferPdfData: true,
});
const pdfDocument = await loadingTask.promise;
expect(pdfDocument.numPages).toEqual(14);

const pdfPage = await pdfDocument.getPage(10);
expect(pdfPage.rotate).toEqual(0);
expect(fetches).toBeGreaterThan(2);

// Check that the TypedArray was transferred.
expect(initialData.length).toEqual(0);

await loadingTask.destroy();
});

Expand Down
5 changes: 5 additions & 0 deletions web/app_options.js
Expand Up @@ -270,6 +270,11 @@ const defaultOptions = {
: "../web/standard_fonts/",
kind: OptionKind.API,
},
transferPdfData: {
/** @type {boolean} */
value: typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL"),
kind: OptionKind.API,
},
verbosity: {
/** @type {number} */
value: 1,
Expand Down