Skip to content
Permalink
Browse files
fs: improve promise based readFile performance for big files
This significantly reduces the peak memory for the promise
based readFile operation by reusing a single memory chunk after
each read and strinigifying that chunk immediately.

Signed-off-by: Ruben Bridgewater <ruben@bridgewater.de>
PR-URL: #44295
Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
Reviewed-By: James M Snell <jasnell@gmail.com>
  • Loading branch information
BridgeAR authored and danielleadams committed Oct 11, 2022
1 parent 9c7e664 commit 67fb76519a054c0b5266e0bfb1a7600660893f2e
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 37 deletions.
@@ -16,7 +16,14 @@ const filename = path.resolve(tmpdir.path,
const bench = common.createBenchmark(main, {
duration: [5],
encoding: ['', 'utf-8'],
len: [1024, 16 * 1024 * 1024],
len: [
1024,
512 * 1024,
4 * 1024 ** 2,
8 * 1024 ** 2,
16 * 1024 ** 2,
32 * 1024 ** 2,
],
concurrent: [1, 10]
});

@@ -339,6 +339,9 @@ function readFileAfterStat(err, stats) {
if (err)
return context.close(err);

// TODO(BridgeAR): Check if allocating a smaller chunk is better performance
// wise, similar to the promise based version (less peak memory and chunked
// stringify operations vs multiple C++/JS boundary crossings).
const size = context.size = isFileType(stats, S_IFREG) ? stats[8] : 0;

if (size > kIoMaxLength) {
@@ -348,6 +351,8 @@ function readFileAfterStat(err, stats) {

try {
if (size === 0) {
// TODO(BridgeAR): If an encoding is set, use the StringDecoder to concat
// the result and reuse the buffer instead of allocating a new one.
context.buffers = [];
} else {
context.buffer = Buffer.allocUnsafeSlow(size);
@@ -86,6 +86,7 @@ const {
promisify,
} = require('internal/util');
const { EventEmitterMixin } = require('internal/event_target');
const { StringDecoder } = require('string_decoder');
const { watch } = require('internal/fs/watchers');
const { isIterable } = require('internal/streams/utils');
const assert = require('internal/assert');
@@ -416,63 +417,83 @@ async function writeFileHandle(filehandle, data, signal, encoding) {

async function readFileHandle(filehandle, options) {
const signal = options?.signal;
const encoding = options?.encoding;
const decoder = encoding && new StringDecoder(encoding);

checkAborted(signal);

const statFields = await binding.fstat(filehandle.fd, false, kUsePromises);

checkAborted(signal);

let size;
let size = 0;
let length = 0;
if ((statFields[1/* mode */] & S_IFMT) === S_IFREG) {
size = statFields[8/* size */];
} else {
size = 0;
length = encoding ? MathMin(size, kReadFileBufferLength) : size;
}
if (length === 0) {
length = kReadFileUnknownBufferLength;
}

if (size > kIoMaxLength)
throw new ERR_FS_FILE_TOO_LARGE(size);

let endOfFile = false;
let totalRead = 0;
const noSize = size === 0;
const buffers = [];
const fullBuffer = noSize ? undefined : Buffer.allocUnsafeSlow(size);
do {
let buffer = Buffer.allocUnsafeSlow(length);
let result = '';
let offset = 0;
let buffers;
const chunkedRead = length > kReadFileBufferLength;

while (true) {
checkAborted(signal);
let buffer;
let offset;
let length;
if (noSize) {
buffer = Buffer.allocUnsafeSlow(kReadFileUnknownBufferLength);
offset = 0;
length = kReadFileUnknownBufferLength;
} else {
buffer = fullBuffer;
offset = totalRead;

if (chunkedRead) {
length = MathMin(size - totalRead, kReadFileBufferLength);
}

This comment was marked as spam.

Copy link
@Waazer

Waazer Nov 21, 2022

This comment has been hidden.

Copy link
@Trott

Trott Nov 21, 2022

Member

User blocked.

const bytesRead = (await binding.read(filehandle.fd, buffer, offset,
length, -1, kUsePromises)) || 0;
length, -1, kUsePromises)) ?? 0;
totalRead += bytesRead;
endOfFile = bytesRead === 0 || totalRead === size;
if (noSize && bytesRead > 0) {
const isBufferFull = bytesRead === kReadFileUnknownBufferLength;
const chunkBuffer = isBufferFull ? buffer : buffer.slice(0, bytesRead);
ArrayPrototypePush(buffers, chunkBuffer);

if (bytesRead === 0 ||
totalRead === size ||
(bytesRead !== buffer.length && !chunkedRead)) {
const singleRead = bytesRead === totalRead;

const bytesToCheck = chunkedRead ? totalRead : bytesRead;

if (bytesToCheck !== buffer.length) {
buffer = buffer.subarray(0, bytesToCheck);
}

if (!encoding) {
if (size === 0 && !singleRead) {
ArrayPrototypePush(buffers, buffer);
return Buffer.concat(buffers, totalRead);
}
return buffer;
}

if (singleRead) {
return buffer.toString(encoding);
}
result += decoder.end(buffer);
return result;
}
} while (!endOfFile);

let result;
if (size > 0) {
result = totalRead === size ? fullBuffer : fullBuffer.slice(0, totalRead);
} else {
result = buffers.length === 1 ? buffers[0] : Buffer.concat(buffers,
totalRead);
if (encoding) {
result += decoder.write(buffer);
} else if (size !== 0) {
offset = totalRead;
} else {
buffers ??= [];
// Unknown file size requires chunks.
ArrayPrototypePush(buffers, buffer);
buffer = Buffer.allocUnsafeSlow(kReadFileUnknownBufferLength);
}
}

return options.encoding ? result.toString(options.encoding) : result;
}

// All of the functions are defined as async in order to ensure that errors
@@ -1,3 +1,4 @@
// Flags: --expose-internals
'use strict';

const common = require('../common');
@@ -6,15 +7,15 @@ const assert = require('assert');
const path = require('path');
const { writeFile, readFile } = require('fs').promises;
const tmpdir = require('../common/tmpdir');
const { internalBinding } = require('internal/test/binding');
const fsBinding = internalBinding('fs');
tmpdir.refresh();

const fn = path.join(tmpdir.path, 'large-file');

// Creating large buffer with random content
const largeBuffer = Buffer.from(
Array.apply(null, { length: 16834 * 2 })
.map(Math.random)
.map((number) => (number * (1 << 8)))
Array.from({ length: 1024 ** 2 + 19 }, (_, index) => index)
);

async function createLargeFile() {
@@ -69,11 +70,22 @@ async function validateWrongSignalParam() {

}

async function validateZeroByteLiar() {
const originalFStat = fsBinding.fstat;
fsBinding.fstat = common.mustCall(
() => (/* stat fields */ [0, 1, 2, 3, 4, 5, 6, 7, 0 /* size */])
);
const readBuffer = await readFile(fn);
assert.strictEqual(readBuffer.toString(), largeBuffer.toString());
fsBinding.fstat = originalFStat;
}

(async () => {
await createLargeFile();
await validateReadFile();
await validateReadFileProc();
await validateReadFileAbortLogicBefore();
await validateReadFileAbortLogicDuring();
await validateWrongSignalParam();
await validateZeroByteLiar();
})().then(common.mustCall());

0 comments on commit 67fb765

Please sign in to comment.