Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Remove async line reader from cspell-io
- Loading branch information
Showing
9 changed files
with
41 additions
and
248 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,90 +1,15 @@ | ||
import * as fReader from './fileReader'; | ||
import * as fs from 'fs-extra'; | ||
import * as path from 'path'; | ||
import { Readable } from 'stream'; | ||
import * as asyncIterable from '../async/asyncIterable'; | ||
|
||
describe('Validate the fileReader', () => { | ||
const samplePath = path.join(__dirname, '..', '..', 'samples'); | ||
const fileCities = path.join(samplePath, 'cities.txt'); | ||
const sampleFiles = ['cities.txt', 'cities.CRLF.txt', 'cities.noEOL.txt'].map((f) => path.join(samplePath, f)); | ||
|
||
test('tests reading a file', async () => { | ||
const expected = await fs.readFile(__filename, 'utf8'); | ||
const result = await fReader.readFile(__filename, 'utf8'); | ||
expect(result).toBe(expected); | ||
}); | ||
|
||
test('tests stringsToLines', async () => { | ||
const strings = stringToStream('a1\n2\n3\n4', '5\n6'); | ||
const a = await asyncIterable.toArray(fReader.streamLineByLineAsync(strings)); | ||
expect(a).toEqual(['a1', '2', '3', '45', '6']); | ||
}); | ||
|
||
test('tests stringsToLines trailing new line', async () => { | ||
const strings = stringToStream('a1\n2\n3\n4', '5\n6\n'); | ||
const a = await asyncIterable.toArray(fReader.streamLineByLineAsync(strings)); | ||
expect(a).toEqual(['a1', '2', '3', '45', '6', '']); | ||
}); | ||
|
||
test('the file reader', async () => { | ||
const lines = await asyncIterable.toArray(fReader.streamFileLineByLineAsync(__filename)); | ||
const actual = lines.join('\n'); | ||
const expected = fs.readFileSync(__filename, 'utf8'); | ||
expect(actual).toBe(expected); | ||
}); | ||
|
||
test('the lineReaderAsync', async () => { | ||
const lines = await asyncIterable.toArray(fReader.lineReaderAsync(__filename)); | ||
const expected = fs.readFileSync(__filename, 'utf8').split('\n'); | ||
expect(lines).toEqual(expected); | ||
}); | ||
|
||
test('tests reading the cities sample', async () => { | ||
const lines = await asyncIterable.toArray(fReader.lineReaderAsync(fileCities)); | ||
const file = await fs.readFile(fileCities, 'utf8'); | ||
expect(lines).toEqual(file.split('\n')); | ||
}); | ||
|
||
test('tests streamFileLineByLineAsync', async () => { | ||
await Promise.all( | ||
sampleFiles.map(async (filename) => { | ||
const lines = await asyncIterable.toArray(fReader.streamFileLineByLineAsync(filename)); | ||
const file = await fs.readFile(filename, 'utf8'); | ||
// compare to file: ${filename} | ||
expect(lines).toEqual(file.split(/\r?\n/)); | ||
}) | ||
); | ||
}); | ||
|
||
test('tests streamFileLineByLineAsync 2', async () => { | ||
const lines = await asyncIterable.toArray(fReader.streamFileLineByLineAsync(__filename)); | ||
const file = await fs.readFile(__filename, 'utf8'); | ||
expect(lines).toEqual(file.split('\n')); | ||
}); | ||
|
||
test('missing file', async () => { | ||
const result = asyncIterable.toArray(fReader.lineReaderAsync(__filename + 'not.found')); | ||
return result.then( | ||
() => { | ||
expect('not to be here').toBe(true); | ||
return; | ||
}, | ||
(e) => { | ||
// expect(e).to.be.instanceof(Error); // Since jest currently mocks Error, this test fails. | ||
expect(e.code).toBe('ENOENT'); | ||
} | ||
); | ||
const result = fReader.readFile(__filename + '.missing.txt', 'utf8'); | ||
await expect(result).rejects.toEqual(expect.objectContaining({ code: 'ENOENT' })); | ||
}); | ||
}); | ||
|
||
function stringToStream(...strings: string[]): NodeJS.ReadableStream { | ||
return new Readable({ | ||
read: function () { | ||
for (const s of strings) { | ||
this.push(s); | ||
} | ||
this.push(null); | ||
}, | ||
}); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,156 +1,27 @@ | ||
// cSpell:ignore curr | ||
// cSpell:words zlib iconv | ||
import * as fs from 'fs'; | ||
import * as zlib from 'zlib'; | ||
import * as readline from 'readline'; | ||
import { PassThrough, pipeline as pipelineCB } from 'stream'; | ||
import { promisify } from 'util'; | ||
|
||
const defaultEncoding: BufferEncoding = 'utf8'; | ||
|
||
export function readFile(filename: string, encoding: BufferEncoding = defaultEncoding): Promise<string> { | ||
return new Promise((resolve, reject) => { | ||
const data: string[] = []; | ||
const stream = prepareFileStream(filename, encoding, reject); | ||
let resolved = false; | ||
function complete() { | ||
resolve(data.join('')); | ||
resolved = resolved || (resolve(data.join('')), true); | ||
} | ||
stream.on('error', reject); | ||
stream.on('data', (d: string) => data.push(d)); | ||
stream.on('close', complete); | ||
stream.on('end', complete); | ||
}); | ||
} | ||
const pipeline = promisify(pipelineCB); | ||
|
||
/** | ||
* Reads a file line by line. The last value emitted by the Observable is always an empty string. | ||
* @param filename | ||
* @param encoding defaults to 'utf8' | ||
*/ | ||
export function lineReaderAsync(filename: string, encoding: BufferEncoding = defaultEncoding): AsyncIterable<string> { | ||
return streamFileLineByLineAsync(filename, encoding); | ||
} | ||
const defaultEncoding: BufferEncoding = 'utf8'; | ||
|
||
function prepareFileStream(filename: string, encoding: BufferEncoding, fnError: (e: Error) => void) { | ||
const pipes: NodeJS.ReadWriteStream[] = []; | ||
if (filename.match(/\.gz$/i)) { | ||
pipes.push(zlib.createGunzip()); | ||
} | ||
export async function readFile(filename: string, encoding: BufferEncoding = defaultEncoding): Promise<string> { | ||
const isGzip = filename.match(/\.gz$/i); | ||
const fileStream = fs.createReadStream(filename); | ||
fileStream.on('error', fnError); | ||
const stream = pipes.reduce<NodeJS.ReadableStream>((s, p) => s.pipe(p).on('error', fnError), fileStream); | ||
stream.setEncoding(encoding); | ||
return stream; | ||
} | ||
|
||
/** | ||
* Emit a file line by line | ||
* @param filename full path to the file to read. | ||
* @param encoding defaults to 'utf8' | ||
*/ | ||
export function streamFileLineByLineAsync( | ||
filename: string, | ||
encoding: BufferEncoding = defaultEncoding | ||
): AsyncIterableIterator<string> { | ||
const fnError = (e: Error) => { | ||
iter.throw && iter.throw(e); | ||
}; | ||
const stream = prepareFileStream(filename, encoding, fnError); | ||
const iter = streamLineByLineAsync(stream); | ||
return iter; | ||
} | ||
|
||
type Resolve<T> = (value: T | Promise<T>) => void; | ||
// eslint-disable-next-line @typescript-eslint/no-explicit-any | ||
type Reject = (reason?: any) => void; | ||
|
||
interface Resolvers<T = IteratorResult<string>> { | ||
resolve: Resolve<T>; | ||
reject: Reject; | ||
const zip = isGzip ? zlib.createGunzip() : new PassThrough(); | ||
const t = pipeline(fileStream, zip, streamToText(encoding)); | ||
return await t; | ||
} | ||
|
||
/** | ||
* Emit a file line by line | ||
* @param filename full path to the file to read. | ||
* @param encoding defaults to 'utf8' | ||
*/ | ||
export function streamLineByLineAsync( | ||
stream: NodeJS.ReadableStream, | ||
encoding: BufferEncoding = defaultEncoding | ||
): AsyncIterableIterator<string> { | ||
let data = '.'; | ||
let done = false; | ||
let error: Error | undefined; | ||
const buffer: string[] = []; | ||
const pending: Resolvers[] = []; | ||
const fnError = (e: Error | undefined) => { | ||
error = e; | ||
}; | ||
const fnComplete = () => { | ||
// readline will consume the last newline without emitting an empty last line. | ||
// If the last data read contains a new line, then emit an empty string. | ||
if (data.match(/(?:(?:\r?\n)|(?:\r))$/)) { | ||
buffer.push(''); | ||
} | ||
processBuffer(); | ||
done = true; | ||
}; | ||
// We want to capture the last line. | ||
stream.on('data', (d) => (data = dataToString(d, encoding))); | ||
stream.on('error', fnError); | ||
const rl = readline.createInterface({ | ||
input: stream, | ||
terminal: false, | ||
}); | ||
rl.on('close', fnComplete); | ||
rl.on('line', (text: string) => { | ||
buffer.push(text); | ||
processBuffer(); | ||
}); | ||
|
||
function registerPromise(resolve: Resolve<IteratorResult<string>>, reject: Reject) { | ||
pending.push({ resolve, reject }); | ||
processBuffer(); | ||
} | ||
|
||
function processBuffer() { | ||
if (error && pending.length && !buffer.length) { | ||
const p = pending.shift(); | ||
p?.reject(error); | ||
return; | ||
} | ||
while (pending.length && buffer.length) { | ||
const p = pending.shift(); | ||
const b = buffer.shift(); | ||
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion | ||
p?.resolve({ done: false, value: b! }); | ||
function streamToText(encoding: BufferEncoding): (source: fs.ReadStream) => Promise<string> { | ||
return async function (source: fs.ReadStream): Promise<string> { | ||
const chunks: string[] = []; | ||
source.setEncoding(encoding); // Work with strings rather than `Buffer`s. | ||
for await (const chunk of source) { | ||
chunks.push(chunk); | ||
} | ||
if (!done) { | ||
pending.length ? rl.resume() : rl.pause(); | ||
} | ||
if (done && pending.length && !buffer.length) { | ||
const p = pending.shift(); | ||
p?.resolve({ done, value: undefined }); | ||
} | ||
} | ||
|
||
const iter: AsyncIterableIterator<string> = { | ||
[Symbol.asyncIterator]: () => iter, | ||
next() { | ||
return new Promise(registerPromise); | ||
}, | ||
throw(e) { | ||
fnError(e); | ||
return new Promise(registerPromise); | ||
}, | ||
return chunks.join(''); | ||
}; | ||
|
||
return iter; | ||
} | ||
|
||
function dataToString(data: string | Buffer, encoding: BufferEncoding = 'utf8'): string { | ||
if (typeof data === 'string') { | ||
return data; | ||
} | ||
return data.toString(encoding); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1 @@ | ||
export * from './file'; | ||
export { toArray as asyncIterableToArray } from './async/asyncIterable'; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters