Skip to content

Commit

Permalink
[cspell-tools] Speed things up if the words are in sorted order.
Browse files Browse the repository at this point in the history
  • Loading branch information
Jason3S committed Dec 21, 2019
1 parent 7f8c10f commit 4f52696
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 18 deletions.
8 changes: 8 additions & 0 deletions packages/cspell-trie-lib/src/lib/TrieBuilder.test.ts
Expand Up @@ -10,6 +10,14 @@ describe('Validate TrieBuilder', () => {
expect(countNodes(trie.root)).toBe(113);
});

test('builder', () => {
const builder = new TrieBuilder();
builder.insert(sampleWords);
const trie = builder.build(false);
expect([...trie.words()].sort()).toEqual(sampleWords.sort());
expect(countNodes(trie.root)).toBe(130);
});

test('builder duplicate inserts', () => {
const builder = new TrieBuilder(sampleWords);
builder.insert(sampleWords);
Expand Down
78 changes: 60 additions & 18 deletions packages/cspell-trie-lib/src/lib/TrieBuilder.ts
Expand Up @@ -6,13 +6,21 @@ export function buildTrie(words: Iterable<string>): Trie {
return new TrieBuilder(words).build();
}

interface PathNode {
/** a single character */
s: string;
/** the corresponding child node after adding s */
n: TrieNode;
}

export class TrieBuilder {
private count: number = 0;
private readonly signatures = new Map<string, TrieNode>();
private readonly cached = new Map<TrieNode, number>();
private readonly transforms = new Map<TrieNode, Map<string, TrieNode>>();
private _root: TrieNode = { f: undefined, c: undefined };
private _eow: TrieNode = Object.freeze({ f: 1 });
/** position 0 of lastPath is always the root */
private lastPath: PathNode[] = [{ s: '', n: { f: undefined, c: undefined } }];

constructor(words?: Iterable<string>) {
this._canBeCached(this._eow); // this line is just for coverage reasons
Expand All @@ -24,6 +32,14 @@ export class TrieBuilder {
}
}

private set _root(n: TrieNode) {
this.lastPath[0].n = n;
}

private get _root() {
return this.lastPath[0].n;
}

private signature(n: TrieNode): string {
const isWord = n.f ? '*' : '';
const ref = n.c
Expand Down Expand Up @@ -67,10 +83,7 @@ export class TrieBuilder {
const sig = this.signature(n);
const ref = this.signatures.get(sig);
if (ref !== undefined) {
if (!this.cached.has(ref) && ref !== n) {
this.cached.set(ref, this.count++);
}
return ref;
return this.tryCacheFrozen(ref);
}

this.signatures.set(sig, this.freeze(n));
Expand All @@ -84,7 +97,17 @@ export class TrieBuilder {
this.transforms.set(src, t);
}

private _insert(node: TrieNode, s: string): TrieNode {
private addChild(node: TrieNode, head: string, child: TrieNode): TrieNode {
if (node.c?.get(head) !== child) {
if (!node.c || Object.isFrozen(node)) {
node = {...node, c: new Map(node.c ?? [])};
}
node.c!.set(head, child);
}
return Object.isFrozen(child) ? this.tryToCache(node) : node;
}

private _insert(node: TrieNode, s: string, d: number): TrieNode {
const orig = node;
if (Object.isFrozen(node)) {
const n = this.transforms.get(node)?.get(s);
Expand All @@ -104,34 +127,53 @@ export class TrieBuilder {
const head = s[0];
const tail = s.slice(1);

const child = this._insert(node.c?.get(head) ?? { f: undefined, c: undefined }, tail);
if (node.c?.get(head) !== child) {
if (!node.c || Object.isFrozen(node)) {
node = {...node, c: new Map(node.c ?? [])};
}
node.c!.set(head, child);
}

node = Object.isFrozen(child) ? this.tryToCache(node) : node;
const child = this._insert(node.c?.get(head) ?? { f: undefined, c: undefined }, tail, d + 1);
node = this.addChild(node, head, child);
this.storeTransform(orig, s, node);
this.lastPath[d] = { s: head, n: child };
return node;
}

insertWord(word: string) {
this._root = this._insert(this._root, word);
const letters = word.split('');
let d = 1;
for (const s of letters) {
const p = this.lastPath[d];
if (p?.s !== s) break;
d++;
}
// remove the remaining part of the path because it doesn't match this word.
if (word.length < d) {
d = word.length;
}
this.lastPath.length = d;
d -= 1;
const { n } = this.lastPath[d];
const tail = word.slice(d);
this.lastPath[d].n = this._insert(n, tail, d + 1);
while (d > 0) {
const { s, n } = this.lastPath[d];
d -= 1;
const parent = this.lastPath[d];
const pn = parent.n;
parent.n = this.addChild(pn, s, n);
if (pn === parent.n) break;
const tail = word.slice(d);
this.storeTransform(pn, tail, parent.n);
}
}

insert(words: Iterable<string>) {
for (const w of words) {
this.insertWord(w);
w && this.insertWord(w);
}
}

/**
* Resets the builder
*/
reset() {
this._root = {};
this._root = { f: undefined, c: undefined };
this.cached.clear();
this.signatures.clear();
}
Expand Down

0 comments on commit 4f52696

Please sign in to comment.