[api-minor] Enabling transferring of data fetched with the `PDFFetchS…

…tream` implementation Note how in the API we're transferring the PDF data that's fetched over the network[1]: - https://github.com/mozilla/pdf.js/blob/f28bf23a314e36d9e255037f5716ae1eb8e16fbf/src/display/api.js#L2467-L2480 - https://github.com/mozilla/pdf.js/blob/f28bf23a314e36d9e255037f5716ae1eb8e16fbf/src/display/api.js#L2553-L2564 To support that functionality we have the `PDFDataTransportStream`, `PDFFetchStream`, `PDFNetworkStream`, and `PDFNodeStream` implementations. Here these stream-implementations vary slightly in how they handle `ArrayBuffer`s internally, w.r.t. transferring or copying the data: - In `PDFDataTransportStream` we optionally, after PR 15908, allow transferring of the PDF data as provided externally (used e.g. in the Firefox PDF Viewer). - In `PDFFetchStream` we're currenly always copying the PDF data returned by the Fetch API, which seems unnecessary. As discussed in PR 15908, it'd seem very weird if this sort of browser API didn't allow transferring of the returned data. - In `PDFNetworkStream` we're already, since many years, transferring the PDF data returned by the `XMLHttpRequest` functionality. Note how the `getArrayBuffer` helper function simply returns an `ArrayBuffer` response as-is. - In `PDFNodeStream` we're currently copying the PDF data, however this is unfortunately necessary since Node.js returns data as a `Buffer` object[2]. Given that the `PDFNetworkStream` has been, indirectly, supporting transferring of PDF data for years it would seem really strange if this didn't also apply to the `PDFFetchStream`-implementation. Hence this patch simply enables transferring of PDF data, when accessed using the Fetch API, unconditionally to help reduced main-thread memory usage since the `PDFFetchStream`-implementation is used *by default* in browsers (for the GENERIC build). --- [1] As opposed to PDF data being provided as e.g. a TypedArray when calling `getDocument` in the API. [2] This is a "special" Node.js object, see https://nodejs.org/api/buffer.html#buffer, which doesn't exist in browsers.
mozilla · Jan 12, 2023 · cee97fc · cee97fc
1 parent f28bf23
commit cee97fc
Show file tree

Hide file tree

Showing 3 changed files with 18 additions and 11 deletions.
diff --git a/src/display/api.js b/src/display/api.js
@@ -192,9 +192,7 @@ if (typeof PDFJSDev === "undefined" || !PDFJSDev.test("PRODUCTION")) {
  * @property {boolean} [transferPdfData] - Determines if we can transfer
  *   TypedArrays used for loading the PDF file, utilized together with:
  *    - The `data`-option, for the `getDocument` function.
- *    - The `initialData`-option, for the `PDFDataRangeTransport` constructor.
- *    - The `chunk`-option, for the `PDFDataTransportStream._onReceiveData`
- *      method.
+ *    - The `PDFDataTransportStream` implementation.
  *   This will help reduce main-thread memory usage, however it will take
  *   ownership of the TypedArrays. The default value is `false`.
  * @property {boolean} [isEvalSupported] - Determines if we can evaluate strings
@@ -2472,7 +2470,7 @@ class WorkerTransport {
               return;
             }
             assert(
-              isArrayBuffer(value),
+              value instanceof ArrayBuffer,
               "GetReader - expected an ArrayBuffer."
             );
             // Enqueue data chunk into sink, and transfer it
@@ -2558,7 +2556,7 @@ class WorkerTransport {
               return;
             }
             assert(
-              isArrayBuffer(value),
+              value instanceof ArrayBuffer,
               "GetRangeReader - expected an ArrayBuffer."
             );
             sink.enqueue(new Uint8Array(value), 1, [value]);

diff --git a/src/display/fetch_stream.js b/src/display/fetch_stream.js
@@ -17,6 +17,7 @@ import {
   AbortException,
   assert,
   createPromiseCapability,
+  warn,
 } from "../shared/util.js";
 import {
   createResponseStatusError,
@@ -54,6 +55,17 @@ function createHeaders(httpHeaders) {
   return headers;
 }
 
+function getArrayBuffer(val) {
+  if (val instanceof Uint8Array) {
+    return val.buffer;
+  }
+  if (val instanceof ArrayBuffer) {
+    return val;
+  }
+  warn(`getArrayBuffer - unexpected data format: ${val}`);
+  return new Uint8Array(val).buffer;
+}
+
 /** @implements {IPDFStream} */
 class PDFFetchStream {
   constructor(source) {
@@ -195,8 +207,7 @@ class PDFFetchStreamReader {
       total: this._contentLength,
     });
 
-    const buffer = new Uint8Array(value).buffer;
-    return { value: buffer, done: false };
+    return { value: getArrayBuffer(value), done: false };
   }
 
   cancel(reason) {
@@ -254,8 +265,7 @@ class PDFFetchStreamRangeReader {
     this._loaded += value.byteLength;
     this.onProgress?.({ loaded: this._loaded });
 
-    const buffer = new Uint8Array(value).buffer;
-    return { value: buffer, done: false };
+    return { value: getArrayBuffer(value), done: false };
   }
 
   cancel(reason) {

diff --git a/src/display/network.js b/src/display/network.js
@@ -38,8 +38,7 @@ function getArrayBuffer(xhr) {
   if (typeof data !== "string") {
     return data;
   }
-  const array = stringToBytes(data);
-  return array.buffer;
+  return stringToBytes(data).buffer;
 }
 
 class NetworkManager {