diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlob.en.test.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlob.en.test.ts index 610abaf5fd9..61e5b2fbb3f 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlob.en.test.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlob.en.test.ts @@ -1,8 +1,8 @@ +import { opSkip, opTake, pipe } from '@cspell/cspell-pipe/sync'; import { describe, expect, test } from 'vitest'; import { readTrie } from '../../test/dictionaries.test.helper.js'; import { FastTrieBlob } from './FastTrieBlob.js'; -import { measure } from './test/perf.js'; function getTrie() { return readTrie('@cspell/dict-en_us/cspell-ext.json'); @@ -11,26 +11,21 @@ function getTrie() { describe('Validate English FastTrieBlob', async () => { const pTrie = getTrie(); const sampleTrie = await pTrie; - const sampleWordsLarge = [...sampleTrie.words()]; - const fastTrieBlob = FastTrieBlob.fromWordList(sampleWordsLarge); + const sampleWordsLarge = [...pipe(sampleTrie.words(), opSkip(1000), opTake(6000))]; + const fastTrieBlob = FastTrieBlob.fromTrieRoot(sampleTrie.root); test('insert', () => { - const words = sampleWordsLarge.slice(1000, 6000); + const words = sampleWordsLarge; const ft = new FastTrieBlob(); - measure('FastTrieBlob', () => ft.insert(words)); + ft.insert(words); const result = [...ft.words()]; expect(result).toEqual(words); }); test('has', () => { - const words = sampleWordsLarge.slice(1000, 6000); + const words = sampleWordsLarge; for (const word of words) { expect(fastTrieBlob.has(word)).toBe(true); } }); - - test('fromTrieRoot', () => { - const ft = FastTrieBlob.fromTrieRoot(sampleTrie.root); - expect(ft.has('hello')).toBe(true); - }); }); diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlob.test.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlob.test.ts index 57ba0926fd4..d55505b3339 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlob.test.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlob.test.ts @@ -17,7 +17,7 @@ describe('FastTrieBlob', () => { expect(words.findIndex((word) => !ft.has(word))).toBe(-1); }); - test('', () => { + test('createTriFromList', () => { const root = createTriFromList(words); const ft = FastTrieBlob.fromTrieRoot(root); expect(ft.has('walk')).toBe(true); diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlob.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlob.ts index ddb206e13b3..754e77ac8a0 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlob.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlob.ts @@ -1,4 +1,5 @@ import type { TrieNode, TrieRoot } from '../TrieNode/TrieNode.js'; +import { resolveMap } from './resolveMap.js'; import { TrieBlob } from './TrieBlob.js'; type FastTrieBlobNode = number[]; @@ -165,7 +166,7 @@ export class FastTrieBlob { for (let i = 0; i < nodes.length; ++i) { const node = nodes[i]; // assert(offset === nodeToIndex[i]); - binNodes[offset++] = (node.length << lenShift) | node[0]; + binNodes[offset++] = ((node.length - 1) << lenShift) | node[0]; for (let j = 1; j < node.length; ++j) { const v = node[j]; const nodeRef = v >>> NodeChildRefShift; @@ -225,11 +226,3 @@ export class FastTrieBlob { return tf.freeze(); } } - -function resolveMap(map: Map, key: K, resolve: (key: K) => V): V { - const r = map.get(key); - if (r !== undefined) return r; - const v = resolve(key); - map.set(key, v); - return v; -} diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.test.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.test.ts index 26de25e8ca0..2118603d0e4 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.test.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.test.ts @@ -1,10 +1,11 @@ import { describe, expect, test } from 'vitest'; -import { createTrieBlob } from './createTrieBlob.js'; +import { createTriFromList } from '../TrieNode/trie-util.js'; +import { createTrieBlob, createTrieBlobFromTrieRoot } from './createTrieBlob.js'; import { TrieBlob } from './TrieBlob.js'; describe('TrieBlob', () => { - const sampleWords = ['one', 'two', 'three', 'four', 'walk', 'walking', 'walks', 'wall', 'walls', 'walled']; + const sampleWords = ['one', 'two', 'three', 'four', 'walk', 'walking', 'walks', 'wall', 'walls', 'walled'].sort(); test('Constructor', () => { const tb = createTrieBlob(['one', 'two']); @@ -30,4 +31,10 @@ describe('TrieBlob', () => { expect(r).toEqual(tb); expect([...r.words()]).toEqual(sampleWords); }); + + test('createTrieBlobFromTrieRoot', () => { + const root = createTriFromList(sampleWords); + const trieBlob = createTrieBlobFromTrieRoot(root); + expect([...trieBlob.words()]).toEqual(sampleWords); + }); }); diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.ts index 41f8ca63ea5..587b8b7e483 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.ts @@ -51,7 +51,7 @@ export class TrieBlob { for (let p = 0; p < len; ++p, node = nodes[nodeIdx]) { const letterIdx = charToIndexMap[word[p]]; const count = node & NodeMaskNumChildren; - let i = count - 1; + let i = count; for (; i > 0; --i) { if ((nodes[i + nodeIdx] & NodeMaskChildCharIndex) === letterIdx) { break; @@ -81,12 +81,12 @@ export class TrieBlob { while (depth >= 0) { const { nodeIdx, pos, word } = stack[depth]; const node = nodes[nodeIdx]; - + // pos is 0 when first entering a node if (!pos && node & NodeMaskEOW) { yield word; } const len = node & NodeMaskNumChildren; - if (pos >= len - 1) { + if (pos >= len) { --depth; continue; } @@ -103,10 +103,6 @@ export class TrieBlob { } } - private lookUpCharIndex(char: string): number { - return this.charToIndexMap[char] ?? -1; - } - toJSON() { return { charIndex: this.charIndex, diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/createTrieBlob.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/createTrieBlob.ts index 95654653e28..a1346502eb5 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/createTrieBlob.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/createTrieBlob.ts @@ -1,7 +1,65 @@ +import type { TrieNode, TrieRoot } from '../TrieNode/TrieNode.js'; import { FastTrieBlob } from './FastTrieBlob.js'; -import type { TrieBlob } from './TrieBlob.js'; +import { resolveMap } from './resolveMap.js'; +import { TrieBlob } from './TrieBlob.js'; export function createTrieBlob(words: string[]): TrieBlob { const ft = FastTrieBlob.fromWordList(words); return ft.toTrieBlob(); } + +export function createTrieBlobFromTrieRoot(root: TrieRoot): TrieBlob { + const NodeMaskEOW = TrieBlob.NodeMaskEOW; + const NodeChildRefShift = TrieBlob.NodeChildRefShift; + const NodeMaskNumChildren = TrieBlob.NodeMaskNumChildren; + const nodes: number[] = []; + const charIndex: string[] = ['']; + const charMap: Record = Object.create(null); + const known = new Map(); + + known.set(root, appendNode(root)); + const IdxEOW = appendNode({ f: 1 }); + + function getCharIndex(char: string): number { + const idx = charMap[char]; + if (idx) return idx; + const newIdx = charIndex.push(char) - 1; + charMap[char.normalize('NFC')] = newIdx; + charMap[char.normalize('NFD')] = newIdx; + return newIdx; + } + + function appendNode(n: TrieNode): number { + const idx = nodes.push(n.f ? NodeMaskEOW : 0) - 1; + if (n.c) { + const keys = Object.keys(n.c).map((key) => getCharIndex(key)); + nodes[idx] = nodes[idx] | (keys.length & NodeMaskNumChildren); + nodes.push(...keys); + } + return idx; + } + + function resolveNode(n: TrieNode): number { + if (n.f && !n.c) return IdxEOW; + return appendNode(n); + } + + function walk(n: TrieNode): number { + const found = known.get(n); + if (found) return found; + const nodeIdx = resolveMap(known, n, resolveNode); + if (!n.c) return nodeIdx; + const children = Object.values(n.c); + for (let p = 0; p < children.length; ++p) { + const childNode = children[p]; + const childIdx = walk(childNode); + // Nodes already have the letters, just OR in the child index. + nodes[nodeIdx + p + 1] |= childIdx << NodeChildRefShift; + } + return nodeIdx; + } + + walk(root); + + return new TrieBlob(Uint32Array.from(nodes), charIndex); +} diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/resolveMap.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/resolveMap.ts new file mode 100644 index 00000000000..7974339c2ea --- /dev/null +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/resolveMap.ts @@ -0,0 +1,7 @@ +export function resolveMap(map: Map, key: K, resolve: (key: K) => V): V { + const r = map.get(key); + if (r !== undefined) return r; + const v = resolve(key); + map.set(key, v); + return v; +} diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/test/perf.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/test/perf.ts deleted file mode 100644 index 679542ecd14..00000000000 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/test/perf.ts +++ /dev/null @@ -1,8 +0,0 @@ -export function measure(name: string, fn: () => R): R { - const start = performance.now(); - const r = fn(); - const end = performance.now(); - const elapsed = (' '.repeat(16) + `${(end - start).toFixed(3)}ms.`).slice(-16); - console.log(`${name} ${elapsed}`); - return r; -} diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/test/perfFastTrieBlob.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/test/perfFastTrieBlob.ts index f0216da922c..ed0b0ea5f47 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/test/perfFastTrieBlob.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/test/perfFastTrieBlob.ts @@ -4,9 +4,10 @@ import { readFileSync, writeFileSync } from 'fs'; import type { TrieNode } from '../../../index.js'; import { createTrieRoot, insert, Trie } from '../../../index.js'; import { readTrie } from '../../../test/dictionaries.test.helper.js'; +import { getGlobalPerfTimer } from '../../utils/timer.js'; +import { createTrieBlobFromTrieRoot } from '../createTrieBlob.js'; import { FastTrieBlob } from '../FastTrieBlob.js'; import { TrieBlob } from '../TrieBlob.js'; -import { measure } from './perf.js'; function getTrie() { return readTrie('@cspell/dict-en_us/cspell-ext.json'); @@ -23,17 +24,34 @@ function hasWords(words: string[], method: (word: string) => boolean): boolean { } export async function measureFastBlob(which: string | undefined, method: string | undefined) { - const trie = await getTrie(); + const timer = getGlobalPerfTimer(); + timer.start('measureFastBlob'); + const trie = await timer.measureAsyncFn('getTrie', getTrie); + timer.start('words'); const words = [...trie.words()]; + timer.stop('words'); + timer.mark('done with setup'); + + timer.start('blob'); if (filterTest(which, 'blob')) { - const ft = measure('blob.FastTrieBlob.fromTrieRoot \t', () => FastTrieBlob.fromTrieRoot(trie.root)); - const trieBlob = measure('blob.FastTrieBlob.toTrieBlob \t', () => ft.toTrieBlob()); + { + const ft = timer.measureFn('blob.FastTrieBlob.fromTrieRoot \t', () => FastTrieBlob.fromTrieRoot(trie.root)); + timer.measureFn('blob.FastTrieBlob.toTrieBlob \t', () => ft.toTrieBlob()); + } + const trieBlob = timer.measureFn('blob.createTrieBlobFromTrieRoot\t', () => + createTrieBlobFromTrieRoot(trie.root) + ); switch (method) { case 'has': - measure('blob.TrieBlob.has \t\t', () => hasWords(words, (word) => trieBlob.has(word))); - measure('blob.TrieBlob.has \t\t', () => hasWords(words, (word) => trieBlob.has(word))); + timer.measureFn('blob.TrieBlob.has \t\t', () => hasWords(words, (word) => trieBlob.has(word))); + timer.measureFn('blob.TrieBlob.has \t\t', () => hasWords(words, (word) => trieBlob.has(word))); + break; + case 'words': + timer.start('blob.words'); + [...trieBlob.words()]; + timer.stop('blob.words'); break; case 'dump': writeFileSync('./TrieBlob.en.json', JSON.stringify(trieBlob, null, 2), 'utf8'); @@ -41,40 +59,53 @@ export async function measureFastBlob(which: string | undefined, method: string break; case 'decode': { - const tb = measure('blob.TrieBlob.decodeBin \t', () => { + const tb = timer.measureFn('blob.TrieBlob.decodeBin \t', () => { return TrieBlob.decodeBin(readFileSync('./TrieBlob.en.trieb')); }); - measure('blob.TrieBlob.has \t\t', () => hasWords(words, (word) => tb.has(word))); - measure('blob.TrieBlob.has \t\t', () => hasWords(words, (word) => tb.has(word))); + timer.measureFn('blob.TrieBlob.has \t\t', () => hasWords(words, (word) => tb.has(word))); + timer.measureFn('blob.TrieBlob.has \t\t', () => hasWords(words, (word) => tb.has(word))); } break; } } + timer.stop('blob'); + timer.start('fast'); if (filterTest(which, 'fast')) { - const ft = measure('fast.FastTrieBlob.fromWordList \t', () => FastTrieBlob.fromWordList(words)); + const ft = timer.measureFn('fast.FastTrieBlob.fromWordList \t', () => FastTrieBlob.fromWordList(words)); switch (method) { case 'has': - measure('fast.FastTrieBlob.has \t\t', () => hasWords(words, (word) => ft.has(word))); - measure('fast.FastTrieBlob.has \t\t', () => hasWords(words, (word) => ft.has(word))); + timer.measureFn('fast.FastTrieBlob.has \t\t', () => hasWords(words, (word) => ft.has(word))); + timer.measureFn('fast.FastTrieBlob.has \t\t', () => hasWords(words, (word) => ft.has(word))); + break; + case 'words': + timer.start('blob.words'); + [...ft.words()]; + timer.stop('blob.words'); break; } } + timer.stop('fast'); + timer.start('trie'); if (filterTest(which, 'trie')) { const root = createTrieRoot({}); - measure('trie.createTriFromList \t\t', () => insertWords(root, words)); + timer.measureFn('trie.createTriFromList \t\t', () => insertWords(root, words)); const trie = new Trie(root); switch (method) { case 'has': - measure('trie.Trie.has \t\t\t', () => hasWords(words, (word) => trie.hasWord(word, true))); - measure('trie.Trie.has \t\t\t', () => hasWords(words, (word) => trie.hasWord(word, true))); + timer.measureFn('trie.Trie.has \t\t\t', () => hasWords(words, (word) => trie.hasWord(word, true))); + timer.measureFn('trie.Trie.has \t\t\t', () => hasWords(words, (word) => trie.hasWord(word, true))); break; } } + timer.stop('trie'); + timer.stop('measureFastBlob'); + timer.stop(); + timer.report(); } function filterTest(value: string | undefined, expected: string): boolean { diff --git a/packages/cspell-trie-lib/src/lib/TrieNode/trie-util.ts b/packages/cspell-trie-lib/src/lib/TrieNode/trie-util.ts index c051aed65df..6fe56dc4fe9 100644 --- a/packages/cspell-trie-lib/src/lib/TrieNode/trie-util.ts +++ b/packages/cspell-trie-lib/src/lib/TrieNode/trie-util.ts @@ -1,7 +1,5 @@ -import { opFilter, opMap, pipe } from '@cspell/cspell-pipe/sync'; - import { mergeOptionalWithDefaults } from '../utils/mergeOptionalWithDefaults.js'; -import { walker } from '../walker/walker.js'; +import { walker, walkerWords } from '../walker/walker.js'; import type { YieldResult } from '../walker/walkerTypes.js'; import type { PartialTrieOptions, TrieNode, TrieRoot } from './TrieNode.js'; import { FLAG_WORD } from './TrieNode.js'; @@ -49,11 +47,7 @@ export const iterateTrie = walk; * Generate a Iterator that can walk a Trie and yield the words. */ export function iteratorTrieWords(node: TrieNode): Iterable { - return pipe( - walk(node), - opFilter((r) => isWordTerminationNode(r.node)), - opMap((r) => r.text) - ); + return walkerWords(node); } export function createTrieRoot(options: PartialTrieOptions): TrieRoot { diff --git a/packages/cspell-trie-lib/src/lib/io/importExport.ts b/packages/cspell-trie-lib/src/lib/io/importExport.ts index 9945b71e1d9..29053689103 100644 --- a/packages/cspell-trie-lib/src/lib/io/importExport.ts +++ b/packages/cspell-trie-lib/src/lib/io/importExport.ts @@ -1,5 +1,3 @@ -import { toDistributableIterable } from '@cspell/cspell-pipe'; - import type { TrieRoot } from '../TrieNode/TrieNode.js'; import * as iv1 from './importExportV1.js'; import * as iv2 from './importExportV2.js'; @@ -23,7 +21,15 @@ const serializers: readonly Serializer[] = [ iv4.serializeTrie, ] as const; -const deserializers = [iv1.importTrie, iv1.importTrie, iv2.importTrie, iv3.importTrie, iv4.importTrie] as const; +type Deserializer = (data: string[]) => TrieRoot; + +const deserializers: readonly Deserializer[] = [ + iv1.importTrie, + iv1.importTrie, + iv2.importTrie, + iv3.importTrie, + iv4.importTrie, +] as const; const DEFAULT_VERSION = 3; @@ -42,17 +48,21 @@ export function serializeTrie(root: TrieRoot, options: ExportOptions | number = return method(root, options); } -export function importTrie(lines: Iterable | IterableIterator | string[]): TrieRoot { - const aLines = Array.isArray(lines) ? lines : [...lines]; +const headerReg = /^\s*TrieXv(\d+)/m; + +export function importTrie(input: Iterable | IterableIterator | string[] | string): TrieRoot { + const lines = Array.isArray(input) ? input : typeof input === 'string' ? input.split('\n') : [...input]; function parseHeaderRows(headerRows: string[]): number { - const header = headerRows.join('\n'); - const headerReg = /^\s*TrieXv(\d+)/m; - const match = header.match(headerReg); - if (!match) throw new Error('Unknown file format'); - return parseInt(match[1], 10); + for (let i = 0; i < headerRows.length; ++i) { + const match = headerRows[i].match(headerReg); + if (match) { + return parseInt(match[1], 10); + } + } + throw new Error('Unknown file format'); } - function readHeader(iter: Iterable) { + function readHeader(iter: string[]) { const headerRows: string[] = []; for (const entry of iter) { const line = entry.trim(); @@ -64,13 +74,11 @@ export function importTrie(lines: Iterable | IterableIterator | return headerRows; } - const input = toDistributableIterable(aLines); - const headerLines = readHeader(input); + const headerLines = readHeader(lines); const version = parseHeaderRows(headerLines); - const stream = [...headerLines, ...input]; const method = deserializers[version]; if (!method) { throw new Error(`Unsupported version: ${version}`); } - return method(stream); + return method(lines); } diff --git a/packages/cspell-trie-lib/src/lib/io/importExportV3.ts b/packages/cspell-trie-lib/src/lib/io/importExportV3.ts index 2c388edf9ff..4b4176aaa0e 100644 --- a/packages/cspell-trie-lib/src/lib/io/importExportV3.ts +++ b/packages/cspell-trie-lib/src/lib/io/importExportV3.ts @@ -1,9 +1,10 @@ -import { opAppend, opConcatMap, pipe, reduce } from '@cspell/cspell-pipe/sync'; +import { opAppend, pipe } from '@cspell/cspell-pipe/sync'; import { trieNodeToRoot } from '../TrieNode/trie-util.js'; import type { TrieNode, TrieRoot } from '../TrieNode/TrieNode.js'; import { FLAG_WORD } from '../TrieNode/TrieNode.js'; import { bufferLines } from '../utils/bufferLines.js'; +import { getGlobalPerfTimer } from '../utils/timer.js'; const EOW = '$'; // End of word const BACK = '<'; // Move up the tree @@ -13,10 +14,8 @@ const REF = '#'; // Start of Reference const EOR = ';'; // End of Reference const ESCAPE = '\\'; -const specialCharacters = new Set( - [EOW, BACK, EOL, REF, EOR, ESCAPE, LF] - .concat('0123456789'.split('')) - .concat('`~!@#$%^&*()_-+=[]{};:\'"<>,./?\\|'.split('')) +const specialCharacters = stringToCharSet( + [EOW, BACK, EOL, REF, EOR, ESCAPE, LF, '0123456789', '`~!@#$%^&*()_-+=[]{};:\'"<>,./?\\|'].join('') ); const specialCharacterMap = new Map([ @@ -73,7 +72,7 @@ export function serializeTrie(root: TrieRoot, options: ExportOptions | number = } function escape(s: string): string { - return specialCharacters.has(s) ? ESCAPE + (specialCharacterMap.get(s) || s) : s; + return s in specialCharacters ? ESCAPE + (specialCharacterMap.get(s) || s) : s; } function* flush() { @@ -169,10 +168,6 @@ export function serializeTrie(root: TrieRoot, options: ExportOptions | number = return pipe(generateHeader(radix, comment), opAppend(bufferLines(serialize(root), 1200, ''))); } -function* toIterableIterator(iter: Iterable): IterableIterator { - yield* iter; -} - interface Stack { node: TrieNode; s: string; @@ -187,13 +182,16 @@ interface ReduceResults { type Reducer = (acc: ReduceResults, s: string) => ReduceResults; -export function importTrie(linesX: Iterable | string): TrieRoot { - linesX = typeof linesX === 'string' ? linesX.split(/^/m) : linesX; +export function importTrie(linesX: string[] | Iterable | string): TrieRoot { + const timer = getGlobalPerfTimer(); + const timerStart = timer.start('importTrieV3'); + const dataLines: string[] = + typeof linesX === 'string' ? linesX.split('\n') : Array.isArray(linesX) ? linesX : [...linesX]; + const root: TrieRoot = trieNodeToRoot({}, {}); let radix = 16; const comment = /^\s*#/; - const iter = toIterableIterator(linesX); function parseHeaderRows(headerRows: string[]) { const header = headerRows.slice(0, 2).join('\n'); @@ -203,15 +201,20 @@ export function importTrie(linesX: Iterable | string): TrieRoot { radix = Number.parseInt(header.replace(headerReg, '$1'), 10); } - function readHeader(iter: Iterator) { - const headerRows: string[] = []; - // eslint-disable-next-line no-constant-condition - while (true) { - const next = iter.next(); - if (next.done) { - break; + function findStartOfData(data: string[]): number { + for (let i = 0; i < data.length; ++i) { + const line = data[i]; + if (line.includes(DATA)) { + return i; } - const line = next.value.trim().replace(/\r|\n/g, ''); + } + return -1; + } + + function readHeader(data: string[]) { + const headerRows: string[] = []; + for (const hLine of data) { + const line = hLine.trim(); if (!line || comment.test(line)) { continue; } @@ -223,22 +226,34 @@ export function importTrie(linesX: Iterable | string): TrieRoot { parseHeaderRows(headerRows); } - readHeader(iter); - - const n = reduce( - pipe( - iter, - opConcatMap((a) => a.split('')) - ), - parseStream(radix), - { - nodes: [root], - root, - stack: [{ node: root, s: '' }], - parser: undefined, + const startOfData = findStartOfData(dataLines); + if (startOfData < 0) { + throw new Error('Unknown file format'); + } + + readHeader(dataLines.slice(0, startOfData)); + + let node: ReduceResults = { + nodes: [root], + root, + stack: [{ node: root, s: '' }], + parser: undefined, + }; + + const parser = parseStream(radix); + + const timerParse = timer.start('importTrieV3.parse'); + + for (let i = startOfData + 1; i < dataLines.length; ++i) { + const line = dataLines[i]; + for (let j = 0; j < line.length; ++j) { + node = parser(node, line[j]); } - ); - return n.root; + } + timerParse(); + timerStart(); + + return node.root; } function parseStream(radix: number): Reducer { @@ -262,7 +277,8 @@ function parseStream(radix: number): Reducer { const { nodes } = acc; nodes.pop(); - return { ...acc, nodes, parser }; + acc.parser = parser; + return acc; } function parseEscapeCharacter(acc: ReduceResults, _: string): ReduceResults { @@ -270,20 +286,23 @@ function parseStream(radix: number): Reducer { const parser = function (acc: ReduceResults, s: string): ReduceResults { if (prev) { s = characterMap.get(prev + s) || s; - return parseCharacter({ ...acc, parser: undefined }, s); + acc.parser = undefined; + return parseCharacter(acc, s); } if (s === ESCAPE) { prev = s; return acc; } - return parseCharacter({ ...acc, parser: undefined }, s); + acc.parser = undefined; + return parseCharacter(acc, s); }; - return { ...acc, parser }; + acc.parser = parser; + return acc; } function parseCharacter(acc: ReduceResults, s: string): ReduceResults { const parser = undefined; - const { root, nodes, stack } = acc; + const { nodes, stack } = acc; const top = stack[stack.length - 1]; const node = top.node; const c = node.c ?? Object.create(null); @@ -292,12 +311,13 @@ function parseStream(radix: number): Reducer { c[s] = n; stack.push({ node: n, s }); nodes.push(n); - return { root, nodes, stack, parser }; + acc.parser = parser; + return acc; } function parseEOW(acc: ReduceResults, _: string): ReduceResults { const parser = parseBack; - const { root, nodes, stack } = acc; + const { nodes, stack } = acc; const top = stack[stack.length - 1]; const node = top.node; node.f = FLAG_WORD; @@ -308,20 +328,23 @@ function parseStream(radix: number): Reducer { nodes.pop(); } stack.pop(); - return { root, nodes, stack, parser }; + acc.parser = parser; + return acc; } - const charactersBack = new Set((BACK + '23456789').split('')); + const charactersBack = stringToCharSet(BACK + '23456789'); function parseBack(acc: ReduceResults, s: string): ReduceResults { - if (!charactersBack.has(s)) { - return parserMain({ ...acc, parser: undefined }, s); + if (!(s in charactersBack)) { + acc.parser = undefined; + return parserMain(acc, s); } let n = s === BACK ? 1 : parseInt(s, 10) - 1; const { stack } = acc; while (n-- > 0) { stack.pop(); } - return { ...acc, parser: parseBack }; + acc.parser = parseBack; + return acc; } function parseIgnore(acc: ReduceResults, _: string): ReduceResults { diff --git a/packages/cspell-trie-lib/src/lib/suggestions/suggest-en-a-star.test.ts b/packages/cspell-trie-lib/src/lib/suggestions/suggest-en-a-star.test.ts index b1e95406ce4..ab1c1f2ddd5 100644 --- a/packages/cspell-trie-lib/src/lib/suggestions/suggest-en-a-star.test.ts +++ b/packages/cspell-trie-lib/src/lib/suggestions/suggest-en-a-star.test.ts @@ -2,7 +2,7 @@ import { describe, expect, test } from 'vitest'; import { readTrie } from '../../test/dictionaries.test.helper.js'; import { clean } from '../utils/clean.js'; -import { createTimer } from '../utils/timer.js'; +import { startTimer } from '../utils/timer.js'; import { CompoundWordsMethod } from '../walker/index.js'; import type { GenSuggestionOptionsStrict } from './genSuggestionsOptions.js'; import { genCompoundableSuggestions, suggest } from './suggestAStar.js'; @@ -201,7 +201,7 @@ describe('Validate English Suggestions', () => { const trie = await getTrie(); // cspell:ignore testscompundsuggestions const collector = suggestionCollector('testscompundsuggestions', opts(1, undefined, 3, false)); - const timer = createTimer(); + const timer = startTimer(); collector.collect( genCompoundableSuggestions( trie.root, @@ -210,7 +210,7 @@ describe('Validate English Suggestions', () => { ), suggestionTimeout ); - const elapsed = timer.elapsed(); + const elapsed = timer(); expect(elapsed).toBeLessThan(suggestionTimeout * 4); }, timeout diff --git a/packages/cspell-trie-lib/src/lib/suggestions/suggest-en.test.ts b/packages/cspell-trie-lib/src/lib/suggestions/suggest-en.test.ts index 70ff9f16251..54ab60f8e4c 100644 --- a/packages/cspell-trie-lib/src/lib/suggestions/suggest-en.test.ts +++ b/packages/cspell-trie-lib/src/lib/suggestions/suggest-en.test.ts @@ -6,7 +6,7 @@ import type { WeightMap } from '../distance/index.js'; import { mapDictionaryInformationToWeightMap } from '../mappers/mapDictionaryInfoToWeightMap.js'; import type { DictionaryInformation } from '../models/DictionaryInformation.js'; import { clean } from '../utils/clean.js'; -import { createTimer } from '../utils/timer.js'; +import { startTimer } from '../utils/timer.js'; import { CompoundWordsMethod } from '../walker/index.js'; import type { SuggestionOptions } from './genSuggestionsOptions.js'; import { genCompoundableSuggestions, suggest } from './suggest.js'; @@ -176,9 +176,9 @@ describe('Validate English Suggestions', () => { const trie = await getTrie(); // cspell:ignore testscompundsuggestions const collector = suggestionCollector('testscompundsuggestions', opts(1, undefined, 3)); - const timer = createTimer(); + const timer = startTimer(); collector.collect(genCompoundableSuggestions(trie.root, collector.word, SEPARATE_WORDS), suggestionTimeout); - const elapsed = timer.elapsed(); + const elapsed = timer(); expect(elapsed).toBeLessThan(suggestionTimeout * 4); }, timeout diff --git a/packages/cspell-trie-lib/src/lib/suggestions/suggestCollector.ts b/packages/cspell-trie-lib/src/lib/suggestions/suggestCollector.ts index 2ea2c25d87e..cc57a0c4987 100644 --- a/packages/cspell-trie-lib/src/lib/suggestions/suggestCollector.ts +++ b/packages/cspell-trie-lib/src/lib/suggestions/suggestCollector.ts @@ -2,7 +2,7 @@ import type { WeightMap } from '../distance/index.js'; import { editDistanceWeighted } from '../distance/index.js'; import { addDefToWeightMap } from '../distance/weightedMaps.js'; import type { RequireOptional } from '../types.js'; -import { createTimer } from '../utils/timer.js'; +import { startTimer } from '../utils/timer.js'; import { clean, regexQuote, replaceAllFactory } from '../utils/util.js'; import { WORD_SEPARATOR } from '../walker/index.js'; import { DEFAULT_COMPOUNDED_WORD_SEPARATOR } from './constants.js'; @@ -256,11 +256,11 @@ export function suggestionCollector(wordToMatch: string, options: SuggestionColl timeout = Math.min(timeout, timeRemaining); if (timeout < 0) return; - const timer = createTimer(); + const timer = startTimer(); let ir: IteratorResult; while (!(ir = src.next(stop || maxCost)).done) { - if (timer.elapsed() > timeout) { + if (timer() > timeout) { stop = symStopProcessing; } const { value } = ir; @@ -273,7 +273,7 @@ export function suggestionCollector(wordToMatch: string, options: SuggestionColl } } - timeRemaining -= timer.elapsed(); + timeRemaining -= timer(); } function cleanCompoundResult(sr: SuggestionResultBase): SuggestionResult { diff --git a/packages/cspell-trie-lib/src/lib/utils/timer.test.ts b/packages/cspell-trie-lib/src/lib/utils/timer.test.ts index 14b0533375a..12acbc94aaf 100644 --- a/packages/cspell-trie-lib/src/lib/utils/timer.test.ts +++ b/packages/cspell-trie-lib/src/lib/utils/timer.test.ts @@ -1,26 +1,83 @@ import { promisify } from 'util'; import { describe, expect, test } from 'vitest'; -import { createTimer, polyHrTime } from './timer.js'; +import { createPerfTimer, getGlobalPerfTimer, measure, measureAsync, startTimer } from './timer.js'; const delay = promisify(setTimeout); describe('timer', () => { test('createTimer', async () => { - const t = createTimer(); + const t = startTimer(); await delay(12); - expect(t.elapsed()).toBeGreaterThan(10); + expect(t()).toBeGreaterThan(10); }); test('polyHrTime', async () => { - const a = createTimer(); - const b = createTimer(polyHrTime); + const a = startTimer(); + const b = startTimer(); await delay(12); - const a1 = a.elapsed(); - const b1 = b.elapsed(); + const a1 = a(); + const b1 = b(); expect(a1).toBeGreaterThanOrEqual(10); expect(b1).toBeGreaterThanOrEqual(10); expect(Math.abs(b1 - a1)).toBeLessThan(10); }); + + test('measure', () => { + let msg = ''; + const r = measure( + 'test', + () => fib(8), + (log: string) => (msg = log) + ); + expect(r).toBe(34); + expect(msg).toContain('test '); + }); + + test('measureAsync', async () => { + let msg = ''; + const r = await measureAsync( + 'test', + async () => fib(8), + (log: string) => (msg = log) + ); + expect(r).toBe(34); + expect(msg).toContain('test '); + }); +}); + +describe('perfTimer', () => { + test('createPerfTimer', () => { + const t = createPerfTimer(); + t.start(); + const x = t.start('x'); + x(); + t.start('y'); + t.stop('y'); + t.mark('mark'); + t.stop(); + const report = t.formatReport(); + expect(report).toContain('start'); + expect(report).toContain('stop'); + expect(report).toContain('mark'); + expect(report).toContain('x'); + expect(report).toContain('y'); + }); + + test('getGlobalPerfTimer', () => { + const t = getGlobalPerfTimer(); + expect(t).toBeDefined(); + }); }); + +function fib(n: number): number { + let a = 1, + b = 0; + for (let i = 0; i < n; ++i) { + const c = a + b; + b = a; + a = c; + } + return a; +} diff --git a/packages/cspell-trie-lib/src/lib/utils/timer.ts b/packages/cspell-trie-lib/src/lib/utils/timer.ts index 31d99848e16..4588f5bdc5c 100644 --- a/packages/cspell-trie-lib/src/lib/utils/timer.ts +++ b/packages/cspell-trie-lib/src/lib/utils/timer.ts @@ -1,40 +1,169 @@ -const _hrTime: HRTimeFn = process?.hrtime || polyHrTime; - export interface Timer { - /** Start / restart the timer. */ - start(): void; /** * Calculate the amount of time in ms since the * timer was created / started. */ - elapsed(): number; + (): number; } -export function createTimer(hrTimeFn = _hrTime): Timer { - let start: HRTime = hrTimeFn(); +export function startTimer(): Timer { + const start = performance.now(); - return { - start() { - start = hrTimeFn(); - }, - elapsed() { - return toMilliseconds(hrTimeFn(start)); - }, - }; + return () => performance.now() - start; } -export type HRTimeFn = (time?: HRTime) => HRTime; +export function measure(name: string, fn: () => R, log = console.log): R { + const calcElapsed = startTimer(); + const r = fn(); + const elapsed = (' '.repeat(16) + `${calcElapsed().toFixed(3)}ms.`).slice(-16); + log(`${name} ${elapsed}`); + return r; +} + +export async function measureAsync(name: string, fn: () => Promise, log = console.log): Promise { + const calcElapsed = startTimer(); + const r = await fn(); + const elapsed = (' '.repeat(16) + `${calcElapsed().toFixed(3)}ms.`).slice(-16); + log(`${name} ${elapsed}`); + return r; +} -export type HRTime = [number, number]; +type StopTimer = Timer; -export function toMilliseconds(t: HRTime): number { - return (t[0] + t[1] * 1e-9) * 1000; +export interface PerfTimer { + start(name?: string): StopTimer; + stop(name?: string): number; + mark(name: string): number; + elapsed(): number; + report(reporter?: (text: string) => void): void; + formatReport(): string; + measureFn(name: string, fn: () => R): R; + measureAsyncFn(name: string, fn: () => Promise): Promise; } -export function polyHrTime(time?: HRTime): HRTime { - const now = Date.now() - (time ? toMilliseconds(time) : 0); - const inSeconds = now * 1.0e-3; - const s = Math.floor(inSeconds); - const n = (inSeconds - s) * 1.0e9; - return [s, n]; +interface PerfTimerEvent { + name: string | undefined; + at: number; + elapsed?: number | undefined; +} + +export function createPerfTimer(): PerfTimer { + const timer = startTimer(); + const active = new Map(); + + const events: PerfTimerEvent[] = [{ name: 'start', at: 0 }]; + + function updateEvent(event: PerfTimerEvent, atTime = timer()) { + const elapsed = atTime - event.at; + event.elapsed = (event.elapsed || 0) + elapsed; + return elapsed; + } + + function start(name?: string): StopTimer { + const event: PerfTimerEvent = createEvent(name || 'start'); + events.push(event); + name && active.set(name, event); + return () => updateEvent(event); + } + + function stop(name?: string): number { + const knownEvent = name && active.get(name); + if (knownEvent) { + return updateEvent(knownEvent); + } + return mark(name || 'stop'); + } + + function createEvent(name: string): PerfTimerEvent { + return { name, at: timer() }; + } + + function mark(name: string): number { + const event = createEvent(name); + events.push(event); + return event.at; + } + + function formatReport(): string { + const lineElements = [ + { name: 'Event Name', at: 'Time', elapsed: 'Elapsed' }, + { name: '----------', at: '----', elapsed: '-------' }, + ...mapEvents(), + ]; + + function mapEvents(): { name: string; at: string; elapsed: string }[] { + const stack: number[] = []; + + return events.map((e) => { + for (let s = stack.pop(); s; s = stack.pop()) { + if (s >= e.at + (e.elapsed || 0)) { + stack.push(s); + break; + } + } + const d = stack.length; + if (e.elapsed) { + stack.push(e.at + e.elapsed); + } + return { + name: '| '.repeat(d) + (e.name || '').replace(/\t/g, ' '), + at: `${t(e.at)}`, + elapsed: e.elapsed ? `${t(e.elapsed)}` : '--', + }; + }); + } + + function t(ms: number): string { + return ms.toFixed(3) + 'ms'; + } + function m(v: number, s: string): number { + return Math.max(v, s.length); + } + const lengths = lineElements.reduce( + (a, b) => ({ name: m(a.name, b.name), at: m(a.at, b.at), elapsed: m(a.elapsed, b.elapsed) }), + { name: 0, at: 0, elapsed: 0 } + ); + const lines = lineElements.map( + (e) => + `${e.at.padStart(lengths.at)} ${e.name.padEnd(lengths.name)} ${e.elapsed.padStart(lengths.elapsed)}` + ); + return lines.join('\n'); + } + + function measureFn(name: string, fn: () => R): R { + const s = start(name); + const v = fn(); + s(); + return v; + } + + async function measureAsyncFn(name: string, fn: () => Promise): Promise { + const s = start(name); + const v = await fn(); + s(); + return v; + } + + function report(reporter: (text: string) => void = console.log) { + reporter(formatReport()); + } + + return { + start, + stop, + mark, + elapsed: timer, + report, + formatReport, + measureFn, + measureAsyncFn, + }; +} + +let globalPerfTimer: PerfTimer | undefined = undefined; + +export function getGlobalPerfTimer(): PerfTimer { + const timer = globalPerfTimer || createPerfTimer(); + globalPerfTimer = timer; + return timer; } diff --git a/packages/cspell-trie-lib/src/lib/walker/walker.ts b/packages/cspell-trie-lib/src/lib/walker/walker.ts index 3e5c5596591..d34eea8e992 100644 --- a/packages/cspell-trie-lib/src/lib/walker/walker.ts +++ b/packages/cspell-trie-lib/src/lib/walker/walker.ts @@ -6,27 +6,28 @@ import { CompoundWordsMethod, JOIN_SEPARATOR, WORD_SEPARATOR } from './walkerTyp * Walks the Trie and yields a value at each node. * next(goDeeper: boolean): */ -export function* walker( - root: TrieNode, - compoundingMethod: CompoundWordsMethod = CompoundWordsMethod.NONE -): WalkerIterator { - const roots: { [index: number]: [string, TrieNode][] } = { +function* compoundWalker(root: TrieNode, compoundingMethod: CompoundWordsMethod): WalkerIterator { + type Children = Array<[string, TrieNode]>; + const roots: { [index: number]: Children } = { [CompoundWordsMethod.NONE]: [], [CompoundWordsMethod.JOIN_WORDS]: [[JOIN_SEPARATOR, root]], [CompoundWordsMethod.SEPARATE_WORDS]: [[WORD_SEPARATOR, root]], }; - function children(n: TrieNode): Array<[string, TrieNode]> { - if (n.c && n.f) { - return Object.entries(n.c).concat(roots[compoundingMethod]); + const rc = roots[compoundingMethod].length ? roots[compoundingMethod] : undefined; + const empty: Children = []; + + function children(n: TrieNode): Children { + if (n.c && n.f && rc) { + return Object.entries(n.c).concat(rc); } if (n.c) { return Object.entries(n.c); } - if (n.f) { - return roots[compoundingMethod]; + if (n.f && rc) { + return rc; } - return []; + return empty; } let depth = 0; @@ -49,3 +50,97 @@ export function* walker( depth -= 1; } } + +/** + * Walks the Trie and yields a value at each node. + * next(goDeeper: boolean): + */ +function* nodeWalker(root: TrieNode): WalkerIterator { + type Children = Array; + const empty: Children = []; + function children(n: TrieNode): string[] { + if (n.c) { + return Object.keys(n.c); + } + return empty; + } + + let depth = 0; + const stack: { t: string; n: Record | undefined; c: Children; ci: number }[] = []; + stack[depth] = { t: '', n: root.c, c: children(root), ci: 0 }; + while (depth >= 0) { + let s = stack[depth]; + let baseText = s.t; + while (s.ci < s.c.length && s.n) { + const char = s.c[s.ci++]; + const node = s.n[char]; + const text = baseText + char; + const goDeeper = yield { text, node, depth }; + if (goDeeper !== false) { + depth++; + baseText = text; + const s = stack[depth]; + const c = children(node); + if (s) { + s.t = text; + s.n = node.c; + s.c = c; + s.ci = 0; + } else { + stack[depth] = { t: text, n: node.c, c, ci: 0 }; + } + } + s = stack[depth]; + } + depth -= 1; + } +} + +/** + * Walks the Trie and yields each word. + */ +export function* walkerWords(root: TrieNode): Iterable { + type Children = Array; + const empty: Children = []; + function children(n: TrieNode): string[] { + if (n.c) { + return Object.keys(n.c); + } + return empty; + } + + let depth = 0; + const stack: { t: string; n: Record | undefined; c: Children; ci: number }[] = []; + stack[depth] = { t: '', n: root.c, c: children(root), ci: 0 }; + while (depth >= 0) { + let s = stack[depth]; + let baseText = s.t; + while (s.ci < s.c.length && s.n) { + const char = s.c[s.ci++]; + const node = s.n[char]; + const text = baseText + char; + if (node.f) yield text; + depth++; + baseText = text; + const c = children(node); + if (stack[depth]) { + s = stack[depth]; + s.t = text; + s.n = node.c; + s.c = c; + s.ci = 0; + } else { + stack[depth] = { t: text, n: node.c, c, ci: 0 }; + } + s = stack[depth]; + } + depth -= 1; + } +} + +export function walker( + root: TrieNode, + compoundingMethod: CompoundWordsMethod = CompoundWordsMethod.NONE +): WalkerIterator { + return compoundingMethod === CompoundWordsMethod.NONE ? nodeWalker(root) : compoundWalker(root, compoundingMethod); +} diff --git a/packages/cspell-trie-lib/src/test/reader.test.helper.ts b/packages/cspell-trie-lib/src/test/reader.test.helper.ts index 2510bd7c247..699c9f7de06 100644 --- a/packages/cspell-trie-lib/src/test/reader.test.helper.ts +++ b/packages/cspell-trie-lib/src/test/reader.test.helper.ts @@ -19,7 +19,6 @@ export async function readTrieFile(filename: string): Promise { .readFile(filename) .then((buffer) => (filename.match(/\.gz$/) ? zlib.gunzipSync(buffer) : buffer)) .then((buffer) => buffer.toString('utf8')); - const trieLines = trieFileContents.split('\n'); - const trieNode = importTrie(trieLines); + const trieNode = importTrie(trieFileContents); return new Trie(trieNode); }