diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlob.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlob.ts index 754e77ac8a0..dc373533a44 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlob.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlob.ts @@ -1,4 +1,5 @@ -import type { TrieNode, TrieRoot } from '../TrieNode/TrieNode.js'; +import type { PartialTrieOptions, TrieNode, TrieOptions, TrieRoot } from '../TrieNode/TrieNode.js'; +import { mergeOptionalWithDefaults } from '../utils/mergeOptionalWithDefaults.js'; import { resolveMap } from './resolveMap.js'; import { TrieBlob } from './TrieBlob.js'; @@ -14,6 +15,12 @@ export class FastTrieBlob { private nodes: FastTrieBlobNode[] = [[0], [NodeMaskEOW]]; private _readonly = false; + readonly options: Readonly; + + constructor(options?: PartialTrieOptions) { + this.options = mergeOptionalWithDefaults(options); + } + private lookUpCharIndex(char: string): number { return this.charToIndexMap[char] ?? -1; } @@ -175,7 +182,7 @@ export class FastTrieBlob { } } - return new TrieBlob(binNodes, this.charIndex); + return new TrieBlob(binNodes, this.charIndex, this.options); } isReadonly(): boolean { diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.test.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.test.ts index 2118603d0e4..5126f31e280 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.test.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.test.ts @@ -1,6 +1,7 @@ import { describe, expect, test } from 'vitest'; import { createTriFromList } from '../TrieNode/trie-util.js'; +import { walkerWordsITrie } from '../walker/walker.js'; import { createTrieBlob, createTrieBlobFromTrieRoot } from './createTrieBlob.js'; import { TrieBlob } from './TrieBlob.js'; @@ -37,4 +38,11 @@ describe('TrieBlob', () => { const trieBlob = createTrieBlobFromTrieRoot(root); expect([...trieBlob.words()]).toEqual(sampleWords); }); + + test('toITrieNodeRoot', () => { + const root = createTriFromList(sampleWords); + const trieBlob = createTrieBlobFromTrieRoot(root); + const iter = walkerWordsITrie(TrieBlob.toITrieNodeRoot(trieBlob)); + expect([...iter]).toEqual(sampleWords); + }); }); diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.ts index 587b8b7e483..b47a96e6512 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.ts @@ -1,3 +1,8 @@ +import { defaultTrieOptions } from '../constants.js'; +import type { ITrieNode, ITrieNodeRoot } from '../TrieNode/ITrieNode.js'; +import type { PartialTrieOptions, TrieOptions } from '../TrieNode/TrieNode.js'; +import { mergeOptionalWithDefaults } from '../utils/mergeOptionalWithDefaults.js'; + const NodeHeaderNumChildrenBits = 8; const NodeHeaderNumChildrenShift = 0; @@ -29,8 +34,11 @@ const version = '00.01.00'; const endianSig = 0x04030201; export class TrieBlob { - private charToIndexMap: Record; - constructor(private nodes: Uint32Array, private charIndex: string[]) { + protected charToIndexMap: Record; + readonly options: Readonly; + + constructor(protected nodes: Uint32Array, protected charIndex: string[], options: PartialTrieOptions) { + this.options = mergeOptionalWithDefaults(options); this.charToIndexMap = Object.create(null); for (let i = 0; i < charIndex.length; ++i) { const char = charIndex[i]; @@ -106,7 +114,8 @@ export class TrieBlob { toJSON() { return { charIndex: this.charIndex, - nodes: splitString(Buffer.from(this.nodes.buffer).toString('base64')), + options: this.options, + nodes: splitString(Buffer.from(this.nodes.buffer, 128).toString('base64')), }; } @@ -153,7 +162,16 @@ export class TrieBlob { .toString('utf8') .split('\n'); const nodes = new Uint32Array(blob.buffer).subarray(offsetNodes / 4, offsetNodes / 4 + lenNodes); - return new TrieBlob(nodes, charIndex); + return new TrieBlob(nodes, charIndex, defaultTrieOptions); + } + + static toITrieNodeRoot(trie: TrieBlob): ITrieNodeRoot { + const trieData: TrieBlobInternals = { + nodes: trie.nodes, + charIndex: trie.charIndex, + charToIndexMap: trie.charToIndexMap, + }; + return new TrieBlobIRoot(trieData, 0, trie.options); } static NodeMaskEOW = 0x00000100; @@ -209,3 +227,80 @@ function splitString(s: string, len = 64): string[] { // } // console.log(values.join(' ')); // } + +interface TrieBlobInternals { + readonly nodes: Uint32Array; + readonly charIndex: string[]; + readonly charToIndexMap: Readonly>; +} + +const EmptyKeys: readonly string[] = Object.freeze([]); + +class TrieBlobINode implements ITrieNode { + readonly size: number; + readonly node: number; + readonly eow: boolean; + keys: string[] | undefined; + charToIdx: Record | undefined; + + constructor(readonly trie: TrieBlobInternals, readonly nodeIdx: number) { + const node = trie.nodes[nodeIdx]; + this.node = node; + this.eow = !!(node & TrieBlob.NodeMaskEOW); + this.size = node & TrieBlob.NodeMaskNumChildren; + } + + /** get keys to children */ + getKeys(): readonly string[] { + if (this.keys) return this.keys; + if (!this.size) return EmptyKeys; + const NodeMaskChildCharIndex = TrieBlob.NodeMaskChildCharIndex; + const charIndex = this.trie.charIndex; + const keys = Array(this.size); + const offset = this.nodeIdx + 1; + const len = this.size; + for (let i = 0; i < len; ++i) { + const entry = this.trie.nodes[i + offset]; + const charIdx = entry & NodeMaskChildCharIndex; + keys[i] = charIndex[charIdx]; + } + this.keys = keys; + return keys; + } + + /** get child ITrieNode */ + get(char: string): ITrieNode | undefined { + const idx = this.getCharToIdxMap()[char]; + if (idx === undefined) return undefined; + return this.child(idx); + } + + has(char: string): boolean { + const idx = this.getCharToIdxMap()[char]; + return idx !== undefined; + } + + child(keyIdx: number): ITrieNode | undefined { + const n = this.trie.nodes[this.nodeIdx + keyIdx + 1]; + const nodeIdx = n >>> TrieBlob.NodeChildRefShift; + return new TrieBlobINode(this.trie, nodeIdx); + } + + getCharToIdxMap(): Record { + const m = this.charToIdx; + if (m) return m; + const map: Record = Object.create(null); + const keys = this.getKeys(); + for (let i = 0; i < keys.length; ++i) { + map[keys[i]] = i; + } + this.charToIdx = map; + return map; + } +} + +class TrieBlobIRoot extends TrieBlobINode implements ITrieNodeRoot { + constructor(trie: TrieBlobInternals, nodeIdx: number, readonly options: Readonly) { + super(trie, nodeIdx); + } +} diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/createTrieBlob.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/createTrieBlob.ts index a1346502eb5..80d6b4b2d69 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/createTrieBlob.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/createTrieBlob.ts @@ -61,5 +61,5 @@ export function createTrieBlobFromTrieRoot(root: TrieRoot): TrieBlob { walk(root); - return new TrieBlob(Uint32Array.from(nodes), charIndex); + return new TrieBlob(Uint32Array.from(nodes), charIndex, root); } diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/test/perfFastTrieBlob.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/test/perfFastTrieBlob.ts index ed0b0ea5f47..8c848012b7a 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/test/perfFastTrieBlob.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/test/perfFastTrieBlob.ts @@ -5,6 +5,7 @@ import type { TrieNode } from '../../../index.js'; import { createTrieRoot, insert, Trie } from '../../../index.js'; import { readTrie } from '../../../test/dictionaries.test.helper.js'; import { getGlobalPerfTimer } from '../../utils/timer.js'; +import { walkerWords, walkerWordsITrie } from '../../walker/walker.js'; import { createTrieBlobFromTrieRoot } from '../createTrieBlob.js'; import { FastTrieBlob } from '../FastTrieBlob.js'; import { TrieBlob } from '../TrieBlob.js'; @@ -52,10 +53,19 @@ export async function measureFastBlob(which: string | undefined, method: string timer.start('blob.words'); [...trieBlob.words()]; timer.stop('blob.words'); + + timer.start('blob.walkerWordsITrie'); + [...walkerWordsITrie(TrieBlob.toITrieNodeRoot(trieBlob))]; + timer.stop('blob.walkerWordsITrie'); break; case 'dump': + timer.start('blob.write.TrieBlob.en.json'); writeFileSync('./TrieBlob.en.json', JSON.stringify(trieBlob, null, 2), 'utf8'); + timer.stop('blob.write.TrieBlob.en.json'); + + timer.start('blob.write.TrieBlob.en.trieb'); writeFileSync('./TrieBlob.en.trieb', trieBlob.encodeBin()); + timer.stop('blob.write.TrieBlob.en.trieb'); break; case 'decode': { @@ -80,9 +90,9 @@ export async function measureFastBlob(which: string | undefined, method: string timer.measureFn('fast.FastTrieBlob.has \t\t', () => hasWords(words, (word) => ft.has(word))); break; case 'words': - timer.start('blob.words'); + timer.start('fast.words'); [...ft.words()]; - timer.stop('blob.words'); + timer.stop('fast.words'); break; } } @@ -100,6 +110,11 @@ export async function measureFastBlob(which: string | undefined, method: string timer.measureFn('trie.Trie.has \t\t\t', () => hasWords(words, (word) => trie.hasWord(word, true))); timer.measureFn('trie.Trie.has \t\t\t', () => hasWords(words, (word) => trie.hasWord(word, true))); break; + case 'words': + timer.start('trie.words'); + [...trie.words()]; + timer.stop('trie.words'); + break; } } timer.stop('trie'); diff --git a/packages/cspell-trie-lib/src/lib/TrieNode/ITrieNode.ts b/packages/cspell-trie-lib/src/lib/TrieNode/ITrieNode.ts new file mode 100644 index 00000000000..d02a28257df --- /dev/null +++ b/packages/cspell-trie-lib/src/lib/TrieNode/ITrieNode.ts @@ -0,0 +1,20 @@ +import type { TrieOptions } from './TrieNode.js'; + +export interface ITrieNode { + /** flag End of Word */ + readonly eow: boolean; + /** number of children */ + readonly size: number; + /** get keys to children */ + getKeys(): readonly string[]; + /** get child ITrieNode */ + get(char: string): ITrieNode | undefined; + /** get a child by the key index */ + child(idx: number): ITrieNode | undefined; + /** has child */ + has(char: string): boolean; +} + +export interface ITrieNodeRoot extends ITrieNode { + options: Readonly; +} diff --git a/packages/cspell-trie-lib/src/lib/TrieNode/trie.ts b/packages/cspell-trie-lib/src/lib/TrieNode/trie.ts new file mode 100644 index 00000000000..405126f0e06 --- /dev/null +++ b/packages/cspell-trie-lib/src/lib/TrieNode/trie.ts @@ -0,0 +1,68 @@ +import type { ITrieNode, ITrieNodeRoot } from './ITrieNode.js'; +import type { TrieNode, TrieOptions, TrieRoot } from './TrieNode.js'; + +export function trieRootToITrieRoot(root: TrieRoot): ITrieNodeRoot { + return new ImplITrieRoot(root); +} + +export function trieNodeToITrieNode(root: TrieNode): ITrieNode { + return new ImplITrieNode(root); +} + +const EmptyKeys: readonly string[] = Object.freeze([]); + +class ImplITrieNode implements ITrieNode { + private keys: readonly string[] | undefined; + constructor(readonly node: TrieNode) {} + + /** flag End of Word */ + get eow(): boolean { + return !!this.node.f; + } + + /** number of children */ + get size(): number { + if (!this.node.c) return 0; + return this.getKeys().length; + } + + /** get keys to children */ + getKeys(): readonly string[] { + if (this.keys) return this.keys; + const keys = this.node.c ? Object.keys(this.node.c) : EmptyKeys; + this.keys = keys; + return keys; + } + + /** get child ITrieNode */ + get(char: string): ITrieNode | undefined { + const n = this.node.c?.[char]; + if (!n) return undefined; + return new ImplITrieNode(n); + } + + has(char: string): boolean { + const c = this.node.c; + return (c && char in c) || false; + } + + child(keyIdx: number): ITrieNode | undefined { + const char = this.getKeys()[keyIdx]; + if (!char) return undefined; + return this.get(char); + } +} + +class ImplITrieRoot extends ImplITrieNode implements ITrieNodeRoot { + readonly options: Readonly; + + constructor(readonly root: TrieRoot) { + super(root); + const { stripCaseAndAccentsPrefix, compoundCharacter, forbiddenWordPrefix } = root; + this.options = { stripCaseAndAccentsPrefix, compoundCharacter, forbiddenWordPrefix }; + } + + get eow(): boolean { + return false; + } +} diff --git a/packages/cspell-trie-lib/src/lib/trie.test.ts b/packages/cspell-trie-lib/src/lib/trie.test.ts index 8f20372862b..d580848d9aa 100644 --- a/packages/cspell-trie-lib/src/lib/trie.test.ts +++ b/packages/cspell-trie-lib/src/lib/trie.test.ts @@ -30,7 +30,7 @@ describe('Validate Trie Class', () => { test('Tests complete', () => { const trie = Trie.create(sampleWords); - expect([...trie.completeWord('lift')]).toEqual(sampleWords.filter((w) => w.slice(0, 4) === 'lift').sort()); + expect([...trie.completeWord('lift')]).toEqual(sampleWords.filter((w) => w.startsWith('lift')).sort()); expect([...trie.completeWord('life')]).toEqual([]); expect([...trie.completeWord('lifting')]).toEqual(['lifting']); }); diff --git a/packages/cspell-trie-lib/src/lib/walker/walker.test.ts b/packages/cspell-trie-lib/src/lib/walker/walker.test.ts index a17e0783f92..d324aebcf96 100644 --- a/packages/cspell-trie-lib/src/lib/walker/walker.test.ts +++ b/packages/cspell-trie-lib/src/lib/walker/walker.test.ts @@ -4,8 +4,8 @@ import { createTriFromList, orderTrie } from '../TrieNode/trie-util.js'; import { walker } from './walker.js'; import type { WalkerIterator, YieldResult } from './walkerTypes.js'; -describe('Validate Util Functions', () => { - test('Tests Walker', () => { +describe('walker', () => { + test('walker', () => { const root = createTriFromList(sampleWords); orderTrie(root); const i = walker(root); diff --git a/packages/cspell-trie-lib/src/lib/walker/walker.ts b/packages/cspell-trie-lib/src/lib/walker/walker.ts index d34eea8e992..53e53868cda 100644 --- a/packages/cspell-trie-lib/src/lib/walker/walker.ts +++ b/packages/cspell-trie-lib/src/lib/walker/walker.ts @@ -1,3 +1,5 @@ +import type { ITrieNode } from '../TrieNode/ITrieNode.js'; +import { trieNodeToITrieNode } from '../TrieNode/trie.js'; import type { TrieNode } from '../TrieNode/TrieNode.js'; import type { WalkerIterator } from './walkerTypes.js'; import { CompoundWordsMethod, JOIN_SEPARATOR, WORD_SEPARATOR } from './walkerTypes.js'; @@ -96,10 +98,13 @@ function* nodeWalker(root: TrieNode): WalkerIterator { } } +const useITrie = false; +export const walkerWords = useITrie ? _walkerWords2 : _walkerWords; + /** * Walks the Trie and yields each word. */ -export function* walkerWords(root: TrieNode): Iterable { +function* _walkerWords(root: TrieNode): Iterable { type Children = Array; const empty: Children = []; function children(n: TrieNode): string[] { @@ -144,3 +149,49 @@ export function walker( ): WalkerIterator { return compoundingMethod === CompoundWordsMethod.NONE ? nodeWalker(root) : compoundWalker(root, compoundingMethod); } + +function _walkerWords2(root: TrieNode): Iterable { + return walkerWordsITrie(trieNodeToITrieNode(root)); +} + +/** + * Walks the Trie and yields each word. + */ +export function* walkerWordsITrie(root: ITrieNode): Iterable { + type Children = readonly string[]; + interface Stack { + t: string; + n: ITrieNode; + c: Children; + ci: number; + } + + let depth = 0; + const stack: Stack[] = []; + stack[depth] = { t: '', n: root, c: root.getKeys(), ci: 0 }; + while (depth >= 0) { + let s = stack[depth]; + let baseText = s.t; + while (s.ci < s.c.length && s.n) { + const char = s.c[s.ci++]; + const node = s.n.get(char); + if (!node) continue; + const text = baseText + char; + if (node.eow) yield text; + depth++; + baseText = text; + const c = node.getKeys(); + if (stack[depth]) { + s = stack[depth]; + s.t = text; + s.n = node; + s.c = c; + s.ci = 0; + } else { + stack[depth] = { t: text, n: node, c, ci: 0 }; + } + s = stack[depth]; + } + depth -= 1; + } +}