Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Work towards a common Trie Interface #4481

Merged
merged 2 commits into from
May 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlob.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import type { TrieNode, TrieRoot } from '../TrieNode/TrieNode.js';
import type { PartialTrieOptions, TrieNode, TrieOptions, TrieRoot } from '../TrieNode/TrieNode.js';
import { mergeOptionalWithDefaults } from '../utils/mergeOptionalWithDefaults.js';
import { resolveMap } from './resolveMap.js';
import { TrieBlob } from './TrieBlob.js';

Expand All @@ -14,6 +15,12 @@ export class FastTrieBlob {
private nodes: FastTrieBlobNode[] = [[0], [NodeMaskEOW]];
private _readonly = false;

readonly options: Readonly<TrieOptions>;

constructor(options?: PartialTrieOptions) {
this.options = mergeOptionalWithDefaults(options);
}

private lookUpCharIndex(char: string): number {
return this.charToIndexMap[char] ?? -1;
}
Expand Down Expand Up @@ -175,7 +182,7 @@ export class FastTrieBlob {
}
}

return new TrieBlob(binNodes, this.charIndex);
return new TrieBlob(binNodes, this.charIndex, this.options);
}

isReadonly(): boolean {
Expand Down
8 changes: 8 additions & 0 deletions packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { describe, expect, test } from 'vitest';

import { createTriFromList } from '../TrieNode/trie-util.js';
import { walkerWordsITrie } from '../walker/walker.js';
import { createTrieBlob, createTrieBlobFromTrieRoot } from './createTrieBlob.js';
import { TrieBlob } from './TrieBlob.js';

Expand Down Expand Up @@ -37,4 +38,11 @@ describe('TrieBlob', () => {
const trieBlob = createTrieBlobFromTrieRoot(root);
expect([...trieBlob.words()]).toEqual(sampleWords);
});

test('toITrieNodeRoot', () => {
const root = createTriFromList(sampleWords);
const trieBlob = createTrieBlobFromTrieRoot(root);
const iter = walkerWordsITrie(TrieBlob.toITrieNodeRoot(trieBlob));
expect([...iter]).toEqual(sampleWords);
});
});
103 changes: 99 additions & 4 deletions packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
import { defaultTrieOptions } from '../constants.js';
import type { ITrieNode, ITrieNodeRoot } from '../TrieNode/ITrieNode.js';
import type { PartialTrieOptions, TrieOptions } from '../TrieNode/TrieNode.js';
import { mergeOptionalWithDefaults } from '../utils/mergeOptionalWithDefaults.js';

const NodeHeaderNumChildrenBits = 8;
const NodeHeaderNumChildrenShift = 0;

Expand Down Expand Up @@ -29,8 +34,11 @@ const version = '00.01.00';
const endianSig = 0x04030201;

export class TrieBlob {
private charToIndexMap: Record<string, number>;
constructor(private nodes: Uint32Array, private charIndex: string[]) {
protected charToIndexMap: Record<string, number>;
readonly options: Readonly<TrieOptions>;

constructor(protected nodes: Uint32Array, protected charIndex: string[], options: PartialTrieOptions) {
this.options = mergeOptionalWithDefaults(options);
this.charToIndexMap = Object.create(null);
for (let i = 0; i < charIndex.length; ++i) {
const char = charIndex[i];
Expand Down Expand Up @@ -106,7 +114,8 @@ export class TrieBlob {
toJSON() {
return {
charIndex: this.charIndex,
nodes: splitString(Buffer.from(this.nodes.buffer).toString('base64')),
options: this.options,
nodes: splitString(Buffer.from(this.nodes.buffer, 128).toString('base64')),
};
}

Expand Down Expand Up @@ -153,7 +162,16 @@ export class TrieBlob {
.toString('utf8')
.split('\n');
const nodes = new Uint32Array(blob.buffer).subarray(offsetNodes / 4, offsetNodes / 4 + lenNodes);
return new TrieBlob(nodes, charIndex);
return new TrieBlob(nodes, charIndex, defaultTrieOptions);
}

static toITrieNodeRoot(trie: TrieBlob): ITrieNodeRoot {
const trieData: TrieBlobInternals = {
nodes: trie.nodes,
charIndex: trie.charIndex,
charToIndexMap: trie.charToIndexMap,
};
return new TrieBlobIRoot(trieData, 0, trie.options);
}

static NodeMaskEOW = 0x00000100;
Expand Down Expand Up @@ -209,3 +227,80 @@ function splitString(s: string, len = 64): string[] {
// }
// console.log(values.join(' '));
// }

interface TrieBlobInternals {
readonly nodes: Uint32Array;
readonly charIndex: string[];
readonly charToIndexMap: Readonly<Record<string, number>>;
}

const EmptyKeys: readonly string[] = Object.freeze([]);

class TrieBlobINode implements ITrieNode {
readonly size: number;
readonly node: number;
readonly eow: boolean;
keys: string[] | undefined;
charToIdx: Record<string, number> | undefined;

constructor(readonly trie: TrieBlobInternals, readonly nodeIdx: number) {
const node = trie.nodes[nodeIdx];
this.node = node;
this.eow = !!(node & TrieBlob.NodeMaskEOW);
this.size = node & TrieBlob.NodeMaskNumChildren;
}

/** get keys to children */
getKeys(): readonly string[] {
if (this.keys) return this.keys;
if (!this.size) return EmptyKeys;
const NodeMaskChildCharIndex = TrieBlob.NodeMaskChildCharIndex;
const charIndex = this.trie.charIndex;
const keys = Array<string>(this.size);
const offset = this.nodeIdx + 1;
const len = this.size;
for (let i = 0; i < len; ++i) {
const entry = this.trie.nodes[i + offset];
const charIdx = entry & NodeMaskChildCharIndex;
keys[i] = charIndex[charIdx];
}
this.keys = keys;
return keys;
}

/** get child ITrieNode */
get(char: string): ITrieNode | undefined {
const idx = this.getCharToIdxMap()[char];
if (idx === undefined) return undefined;
return this.child(idx);
}

has(char: string): boolean {
const idx = this.getCharToIdxMap()[char];
return idx !== undefined;
}

child(keyIdx: number): ITrieNode | undefined {
const n = this.trie.nodes[this.nodeIdx + keyIdx + 1];
const nodeIdx = n >>> TrieBlob.NodeChildRefShift;
return new TrieBlobINode(this.trie, nodeIdx);
}

getCharToIdxMap(): Record<string, number> {
const m = this.charToIdx;
if (m) return m;
const map: Record<string, number> = Object.create(null);
const keys = this.getKeys();
for (let i = 0; i < keys.length; ++i) {
map[keys[i]] = i;
}
this.charToIdx = map;
return map;
}
}

class TrieBlobIRoot extends TrieBlobINode implements ITrieNodeRoot {
constructor(trie: TrieBlobInternals, nodeIdx: number, readonly options: Readonly<TrieOptions>) {
super(trie, nodeIdx);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,5 +61,5 @@ export function createTrieBlobFromTrieRoot(root: TrieRoot): TrieBlob {

walk(root);

return new TrieBlob(Uint32Array.from(nodes), charIndex);
return new TrieBlob(Uint32Array.from(nodes), charIndex, root);
}
19 changes: 17 additions & 2 deletions packages/cspell-trie-lib/src/lib/TrieBlob/test/perfFastTrieBlob.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import type { TrieNode } from '../../../index.js';
import { createTrieRoot, insert, Trie } from '../../../index.js';
import { readTrie } from '../../../test/dictionaries.test.helper.js';
import { getGlobalPerfTimer } from '../../utils/timer.js';
import { walkerWordsITrie } from '../../walker/walker.js';
import { createTrieBlobFromTrieRoot } from '../createTrieBlob.js';
import { FastTrieBlob } from '../FastTrieBlob.js';
import { TrieBlob } from '../TrieBlob.js';
Expand Down Expand Up @@ -52,10 +53,19 @@ export async function measureFastBlob(which: string | undefined, method: string
timer.start('blob.words');
[...trieBlob.words()];
timer.stop('blob.words');

timer.start('blob.walkerWordsITrie');
[...walkerWordsITrie(TrieBlob.toITrieNodeRoot(trieBlob))];
timer.stop('blob.walkerWordsITrie');
break;
case 'dump':
timer.start('blob.write.TrieBlob.en.json');
writeFileSync('./TrieBlob.en.json', JSON.stringify(trieBlob, null, 2), 'utf8');
timer.stop('blob.write.TrieBlob.en.json');

timer.start('blob.write.TrieBlob.en.trieb');
writeFileSync('./TrieBlob.en.trieb', trieBlob.encodeBin());
timer.stop('blob.write.TrieBlob.en.trieb');
break;
case 'decode':
{
Expand All @@ -80,9 +90,9 @@ export async function measureFastBlob(which: string | undefined, method: string
timer.measureFn('fast.FastTrieBlob.has \t\t', () => hasWords(words, (word) => ft.has(word)));
break;
case 'words':
timer.start('blob.words');
timer.start('fast.words');
[...ft.words()];
timer.stop('blob.words');
timer.stop('fast.words');
break;
}
}
Expand All @@ -100,6 +110,11 @@ export async function measureFastBlob(which: string | undefined, method: string
timer.measureFn('trie.Trie.has \t\t\t', () => hasWords(words, (word) => trie.hasWord(word, true)));
timer.measureFn('trie.Trie.has \t\t\t', () => hasWords(words, (word) => trie.hasWord(word, true)));
break;
case 'words':
timer.start('trie.words');
[...trie.words()];
timer.stop('trie.words');
break;
}
}
timer.stop('trie');
Expand Down
20 changes: 20 additions & 0 deletions packages/cspell-trie-lib/src/lib/TrieNode/ITrieNode.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import type { TrieOptions } from './TrieNode.js';

export interface ITrieNode {
/** flag End of Word */
readonly eow: boolean;
/** number of children */
readonly size: number;
/** get keys to children */
getKeys(): readonly string[];
/** get child ITrieNode */
get(char: string): ITrieNode | undefined;
/** get a child by the key index */
child(idx: number): ITrieNode | undefined;
/** has child */
has(char: string): boolean;
}

export interface ITrieNodeRoot extends ITrieNode {
options: Readonly<TrieOptions>;
}
68 changes: 68 additions & 0 deletions packages/cspell-trie-lib/src/lib/TrieNode/trie.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import type { ITrieNode, ITrieNodeRoot } from './ITrieNode.js';
import type { TrieNode, TrieOptions, TrieRoot } from './TrieNode.js';

export function trieRootToITrieRoot(root: TrieRoot): ITrieNodeRoot {
return new ImplITrieRoot(root);
}

export function trieNodeToITrieNode(root: TrieNode): ITrieNode {
return new ImplITrieNode(root);
}

const EmptyKeys: readonly string[] = Object.freeze([]);

class ImplITrieNode implements ITrieNode {
private keys: readonly string[] | undefined;
constructor(readonly node: TrieNode) {}

/** flag End of Word */
get eow(): boolean {
return !!this.node.f;
}

/** number of children */
get size(): number {
if (!this.node.c) return 0;
return this.getKeys().length;
}

/** get keys to children */
getKeys(): readonly string[] {
if (this.keys) return this.keys;
const keys = this.node.c ? Object.keys(this.node.c) : EmptyKeys;
this.keys = keys;
return keys;
}

/** get child ITrieNode */
get(char: string): ITrieNode | undefined {
const n = this.node.c?.[char];
if (!n) return undefined;
return new ImplITrieNode(n);
}

has(char: string): boolean {
const c = this.node.c;
return (c && char in c) || false;
}

child(keyIdx: number): ITrieNode | undefined {
const char = this.getKeys()[keyIdx];
if (!char) return undefined;
return this.get(char);
}
}

class ImplITrieRoot extends ImplITrieNode implements ITrieNodeRoot {
readonly options: Readonly<TrieOptions>;

constructor(readonly root: TrieRoot) {
super(root);
const { stripCaseAndAccentsPrefix, compoundCharacter, forbiddenWordPrefix } = root;
this.options = { stripCaseAndAccentsPrefix, compoundCharacter, forbiddenWordPrefix };
}

get eow(): boolean {
return false;
}
}
2 changes: 1 addition & 1 deletion packages/cspell-trie-lib/src/lib/trie.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ describe('Validate Trie Class', () => {

test('Tests complete', () => {
const trie = Trie.create(sampleWords);
expect([...trie.completeWord('lift')]).toEqual(sampleWords.filter((w) => w.slice(0, 4) === 'lift').sort());
expect([...trie.completeWord('lift')]).toEqual(sampleWords.filter((w) => w.startsWith('lift')).sort());
expect([...trie.completeWord('life')]).toEqual([]);
expect([...trie.completeWord('lifting')]).toEqual(['lifting']);
});
Expand Down
4 changes: 2 additions & 2 deletions packages/cspell-trie-lib/src/lib/walker/walker.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ import { createTriFromList, orderTrie } from '../TrieNode/trie-util.js';
import { walker } from './walker.js';
import type { WalkerIterator, YieldResult } from './walkerTypes.js';

describe('Validate Util Functions', () => {
test('Tests Walker', () => {
describe('walker', () => {
test('walker', () => {
const root = createTriFromList(sampleWords);
orderTrie(root);
const i = walker(root);
Expand Down
Loading