Skip to content

Commit

Permalink
fix: Work towards a common Trie Interface
Browse files Browse the repository at this point in the history
  • Loading branch information
Jason3S committed May 12, 2023
1 parent c0c8873 commit f2d92a6
Show file tree
Hide file tree
Showing 10 changed files with 277 additions and 13 deletions.
11 changes: 9 additions & 2 deletions packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlob.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import type { TrieNode, TrieRoot } from '../TrieNode/TrieNode.js';
import type { PartialTrieOptions, TrieNode, TrieOptions, TrieRoot } from '../TrieNode/TrieNode.js';
import { mergeOptionalWithDefaults } from '../utils/mergeOptionalWithDefaults.js';
import { resolveMap } from './resolveMap.js';
import { TrieBlob } from './TrieBlob.js';

Expand All @@ -14,6 +15,12 @@ export class FastTrieBlob {
private nodes: FastTrieBlobNode[] = [[0], [NodeMaskEOW]];
private _readonly = false;

readonly options: Readonly<TrieOptions>;

constructor(options?: PartialTrieOptions) {
this.options = mergeOptionalWithDefaults(options);
}

private lookUpCharIndex(char: string): number {
return this.charToIndexMap[char] ?? -1;
}
Expand Down Expand Up @@ -175,7 +182,7 @@ export class FastTrieBlob {
}
}

return new TrieBlob(binNodes, this.charIndex);
return new TrieBlob(binNodes, this.charIndex, this.options);
}

isReadonly(): boolean {
Expand Down
8 changes: 8 additions & 0 deletions packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { describe, expect, test } from 'vitest';

import { createTriFromList } from '../TrieNode/trie-util.js';
import { walkerWordsITrie } from '../walker/walker.js';
import { createTrieBlob, createTrieBlobFromTrieRoot } from './createTrieBlob.js';
import { TrieBlob } from './TrieBlob.js';

Expand Down Expand Up @@ -37,4 +38,11 @@ describe('TrieBlob', () => {
const trieBlob = createTrieBlobFromTrieRoot(root);
expect([...trieBlob.words()]).toEqual(sampleWords);
});

test('toITrieNodeRoot', () => {
const root = createTriFromList(sampleWords);
const trieBlob = createTrieBlobFromTrieRoot(root);
const iter = walkerWordsITrie(TrieBlob.toITrieNodeRoot(trieBlob));
expect([...iter]).toEqual(sampleWords);
});
});
103 changes: 99 additions & 4 deletions packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
import { defaultTrieOptions } from '../constants.js';
import type { ITrieNode, ITrieNodeRoot } from '../TrieNode/ITrieNode.js';
import type { PartialTrieOptions, TrieOptions } from '../TrieNode/TrieNode.js';
import { mergeOptionalWithDefaults } from '../utils/mergeOptionalWithDefaults.js';

const NodeHeaderNumChildrenBits = 8;
const NodeHeaderNumChildrenShift = 0;

Expand Down Expand Up @@ -29,8 +34,11 @@ const version = '00.01.00';
const endianSig = 0x04030201;

export class TrieBlob {
private charToIndexMap: Record<string, number>;
constructor(private nodes: Uint32Array, private charIndex: string[]) {
protected charToIndexMap: Record<string, number>;
readonly options: Readonly<TrieOptions>;

constructor(protected nodes: Uint32Array, protected charIndex: string[], options: PartialTrieOptions) {
this.options = mergeOptionalWithDefaults(options);
this.charToIndexMap = Object.create(null);
for (let i = 0; i < charIndex.length; ++i) {
const char = charIndex[i];
Expand Down Expand Up @@ -106,7 +114,8 @@ export class TrieBlob {
toJSON() {
return {
charIndex: this.charIndex,
nodes: splitString(Buffer.from(this.nodes.buffer).toString('base64')),
options: this.options,
nodes: splitString(Buffer.from(this.nodes.buffer, 128).toString('base64')),
};
}

Expand Down Expand Up @@ -153,7 +162,16 @@ export class TrieBlob {
.toString('utf8')
.split('\n');
const nodes = new Uint32Array(blob.buffer).subarray(offsetNodes / 4, offsetNodes / 4 + lenNodes);
return new TrieBlob(nodes, charIndex);
return new TrieBlob(nodes, charIndex, defaultTrieOptions);
}

static toITrieNodeRoot(trie: TrieBlob): ITrieNodeRoot {
const trieData: TrieBlobInternals = {
nodes: trie.nodes,
charIndex: trie.charIndex,
charToIndexMap: trie.charToIndexMap,
};
return new TrieBlobIRoot(trieData, 0, trie.options);
}

static NodeMaskEOW = 0x00000100;
Expand Down Expand Up @@ -209,3 +227,80 @@ function splitString(s: string, len = 64): string[] {
// }
// console.log(values.join(' '));
// }

interface TrieBlobInternals {
readonly nodes: Uint32Array;
readonly charIndex: string[];
readonly charToIndexMap: Readonly<Record<string, number>>;
}

const EmptyKeys: readonly string[] = Object.freeze([]);

class TrieBlobINode implements ITrieNode {
readonly size: number;
readonly node: number;
readonly eow: boolean;
keys: string[] | undefined;
charToIdx: Record<string, number> | undefined;

constructor(readonly trie: TrieBlobInternals, readonly nodeIdx: number) {
const node = trie.nodes[nodeIdx];
this.node = node;
this.eow = !!(node & TrieBlob.NodeMaskEOW);
this.size = node & TrieBlob.NodeMaskNumChildren;
}

/** get keys to children */
getKeys(): readonly string[] {
if (this.keys) return this.keys;
if (!this.size) return EmptyKeys;
const NodeMaskChildCharIndex = TrieBlob.NodeMaskChildCharIndex;
const charIndex = this.trie.charIndex;
const keys = Array<string>(this.size);
const offset = this.nodeIdx + 1;
const len = this.size;
for (let i = 0; i < len; ++i) {
const entry = this.trie.nodes[i + offset];
const charIdx = entry & NodeMaskChildCharIndex;
keys[i] = charIndex[charIdx];
}
this.keys = keys;
return keys;
}

/** get child ITrieNode */
get(char: string): ITrieNode | undefined {
const idx = this.getCharToIdxMap()[char];
if (idx === undefined) return undefined;
return this.child(idx);
}

has(char: string): boolean {
const idx = this.getCharToIdxMap()[char];
return idx !== undefined;
}

child(keyIdx: number): ITrieNode | undefined {
const n = this.trie.nodes[this.nodeIdx + keyIdx + 1];
const nodeIdx = n >>> TrieBlob.NodeChildRefShift;
return new TrieBlobINode(this.trie, nodeIdx);
}

getCharToIdxMap(): Record<string, number> {
const m = this.charToIdx;
if (m) return m;
const map: Record<string, number> = Object.create(null);
const keys = this.getKeys();
for (let i = 0; i < keys.length; ++i) {
map[keys[i]] = i;
}
this.charToIdx = map;
return map;
}
}

class TrieBlobIRoot extends TrieBlobINode implements ITrieNodeRoot {
constructor(trie: TrieBlobInternals, nodeIdx: number, readonly options: Readonly<TrieOptions>) {
super(trie, nodeIdx);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,5 +61,5 @@ export function createTrieBlobFromTrieRoot(root: TrieRoot): TrieBlob {

walk(root);

return new TrieBlob(Uint32Array.from(nodes), charIndex);
return new TrieBlob(Uint32Array.from(nodes), charIndex, root);
}
19 changes: 17 additions & 2 deletions packages/cspell-trie-lib/src/lib/TrieBlob/test/perfFastTrieBlob.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import type { TrieNode } from '../../../index.js';
import { createTrieRoot, insert, Trie } from '../../../index.js';
import { readTrie } from '../../../test/dictionaries.test.helper.js';
import { getGlobalPerfTimer } from '../../utils/timer.js';
import { walkerWords, walkerWordsITrie } from '../../walker/walker.js';

Check failure on line 8 in packages/cspell-trie-lib/src/lib/TrieBlob/test/perfFastTrieBlob.ts

View workflow job for this annotation

GitHub Actions / lint

'walkerWords' is defined but never used. Allowed unused vars must match /^_/u
import { createTrieBlobFromTrieRoot } from '../createTrieBlob.js';
import { FastTrieBlob } from '../FastTrieBlob.js';
import { TrieBlob } from '../TrieBlob.js';
Expand Down Expand Up @@ -52,10 +53,19 @@ export async function measureFastBlob(which: string | undefined, method: string
timer.start('blob.words');
[...trieBlob.words()];
timer.stop('blob.words');

timer.start('blob.walkerWordsITrie');
[...walkerWordsITrie(TrieBlob.toITrieNodeRoot(trieBlob))];
timer.stop('blob.walkerWordsITrie');
break;
case 'dump':
timer.start('blob.write.TrieBlob.en.json');
writeFileSync('./TrieBlob.en.json', JSON.stringify(trieBlob, null, 2), 'utf8');
timer.stop('blob.write.TrieBlob.en.json');

timer.start('blob.write.TrieBlob.en.trieb');
writeFileSync('./TrieBlob.en.trieb', trieBlob.encodeBin());
timer.stop('blob.write.TrieBlob.en.trieb');
break;
case 'decode':
{
Expand All @@ -80,9 +90,9 @@ export async function measureFastBlob(which: string | undefined, method: string
timer.measureFn('fast.FastTrieBlob.has \t\t', () => hasWords(words, (word) => ft.has(word)));
break;
case 'words':
timer.start('blob.words');
timer.start('fast.words');
[...ft.words()];
timer.stop('blob.words');
timer.stop('fast.words');
break;
}
}
Expand All @@ -100,6 +110,11 @@ export async function measureFastBlob(which: string | undefined, method: string
timer.measureFn('trie.Trie.has \t\t\t', () => hasWords(words, (word) => trie.hasWord(word, true)));
timer.measureFn('trie.Trie.has \t\t\t', () => hasWords(words, (word) => trie.hasWord(word, true)));
break;
case 'words':
timer.start('trie.words');
[...trie.words()];
timer.stop('trie.words');
break;
}
}
timer.stop('trie');
Expand Down
20 changes: 20 additions & 0 deletions packages/cspell-trie-lib/src/lib/TrieNode/ITrieNode.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import type { TrieOptions } from './TrieNode.js';

export interface ITrieNode {
/** flag End of Word */
readonly eow: boolean;
/** number of children */
readonly size: number;
/** get keys to children */
getKeys(): readonly string[];
/** get child ITrieNode */
get(char: string): ITrieNode | undefined;
/** get a child by the key index */
child(idx: number): ITrieNode | undefined;
/** has child */
has(char: string): boolean;
}

export interface ITrieNodeRoot extends ITrieNode {
options: Readonly<TrieOptions>;
}
68 changes: 68 additions & 0 deletions packages/cspell-trie-lib/src/lib/TrieNode/trie.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import type { ITrieNode, ITrieNodeRoot } from './ITrieNode.js';
import type { TrieNode, TrieOptions, TrieRoot } from './TrieNode.js';

export function trieRootToITrieRoot(root: TrieRoot): ITrieNodeRoot {
return new ImplITrieRoot(root);
}

export function trieNodeToITrieNode(root: TrieNode): ITrieNode {
return new ImplITrieNode(root);
}

const EmptyKeys: readonly string[] = Object.freeze([]);

class ImplITrieNode implements ITrieNode {
private keys: readonly string[] | undefined;
constructor(readonly node: TrieNode) {}

/** flag End of Word */
get eow(): boolean {
return !!this.node.f;
}

/** number of children */
get size(): number {
if (!this.node.c) return 0;
return this.getKeys().length;
}

/** get keys to children */
getKeys(): readonly string[] {
if (this.keys) return this.keys;
const keys = this.node.c ? Object.keys(this.node.c) : EmptyKeys;
this.keys = keys;
return keys;
}

/** get child ITrieNode */
get(char: string): ITrieNode | undefined {
const n = this.node.c?.[char];
if (!n) return undefined;
return new ImplITrieNode(n);
}

has(char: string): boolean {
const c = this.node.c;
return (c && char in c) || false;
}

child(keyIdx: number): ITrieNode | undefined {
const char = this.getKeys()[keyIdx];
if (!char) return undefined;
return this.get(char);
}
}

class ImplITrieRoot extends ImplITrieNode implements ITrieNodeRoot {
readonly options: Readonly<TrieOptions>;

constructor(readonly root: TrieRoot) {
super(root);
const { stripCaseAndAccentsPrefix, compoundCharacter, forbiddenWordPrefix } = root;
this.options = { stripCaseAndAccentsPrefix, compoundCharacter, forbiddenWordPrefix };
}

get eow(): boolean {
return false;
}
}
2 changes: 1 addition & 1 deletion packages/cspell-trie-lib/src/lib/trie.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ describe('Validate Trie Class', () => {

test('Tests complete', () => {
const trie = Trie.create(sampleWords);
expect([...trie.completeWord('lift')]).toEqual(sampleWords.filter((w) => w.slice(0, 4) === 'lift').sort());
expect([...trie.completeWord('lift')]).toEqual(sampleWords.filter((w) => w.startsWith('lift')).sort());
expect([...trie.completeWord('life')]).toEqual([]);
expect([...trie.completeWord('lifting')]).toEqual(['lifting']);
});
Expand Down
4 changes: 2 additions & 2 deletions packages/cspell-trie-lib/src/lib/walker/walker.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ import { createTriFromList, orderTrie } from '../TrieNode/trie-util.js';
import { walker } from './walker.js';
import type { WalkerIterator, YieldResult } from './walkerTypes.js';

describe('Validate Util Functions', () => {
test('Tests Walker', () => {
describe('walker', () => {
test('walker', () => {
const root = createTriFromList(sampleWords);
orderTrie(root);
const i = walker(root);
Expand Down
Loading

0 comments on commit f2d92a6

Please sign in to comment.