Skip to content

Commit

Permalink
speed up hasWord check.
Browse files Browse the repository at this point in the history
  • Loading branch information
Jason3S committed May 10, 2023
1 parent 2f375df commit 47b6b18
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 21 deletions.
25 changes: 12 additions & 13 deletions packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlob.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import { assert } from 'console';

import type { TrieNode, TrieRoot } from '../TrieNode.js';
import { TrieBlob } from './TrieBlob.js';

Expand Down Expand Up @@ -58,11 +56,11 @@ export class FastTrieBlob {

has(word: string): boolean {
const nodes = this.nodes;
const letterIndexes = [...word].map((char) => this.lookUpCharIndex(char));
const len = word.length;
let nodeIdx = 0;
let node = nodes[nodeIdx];
for (let p = 0; p < letterIndexes.length; ++p, node = nodes[nodeIdx]) {
const letterIdx = letterIndexes[p];
for (let p = 0; p < len; ++p, node = nodes[nodeIdx]) {
const letterIdx = this.lookUpCharIndex(word[p]);
const count = node.length;
let i = count - 1;
for (; i > 0; --i) {
Expand All @@ -82,10 +80,10 @@ export class FastTrieBlob {
if (!word) return this;
const IdxEOW = 1;
const nodes = this.nodes;
const letterIndexes = [...word].map((char) => this.getCharIndex(char));
const len = word.length;
let nodeIdx = 0;
for (let p = 0; p < letterIndexes.length; ++p) {
const letterIdx = letterIndexes[p];
for (let p = 0; p < len; ++p) {
const letterIdx = this.getCharIndex(word[p]);
const node = nodes[nodeIdx];
const count = node.length;
let i = count - 1;
Expand All @@ -96,14 +94,14 @@ export class FastTrieBlob {
}
if (i > 0) {
nodeIdx = node[i] >>> NodeChildRefShift;
if (nodeIdx === 1 && p < letterIndexes.length - 1) {
if (nodeIdx === 1 && p < len - 1) {
nodeIdx = this.nodes.push([NodeMaskEOW]) - 1;
node[i] = (nodeIdx << NodeChildRefShift) | letterIdx;
}
continue;
}

nodeIdx = p < letterIndexes.length - 1 ? this.nodes.push([0]) - 1 : IdxEOW;
nodeIdx = p < len - 1 ? this.nodes.push([0]) - 1 : IdxEOW;
node.push((nodeIdx << NodeChildRefShift) | letterIdx);
}

Expand Down Expand Up @@ -148,24 +146,25 @@ export class FastTrieBlob {
const nodes = this.nodes;
function calcNodeToIndex(nodes: number[][]): number[] {
let offset = 0;
const idx: number[] = [];
const idx: number[] = Array(nodes.length + 1);
for (let i = 0; i < nodes.length; ++i) {
idx[i] = offset;
offset += nodes[i].length;
}
idx[nodes.length] = offset;
return idx;
}

const nodeElementCount = this.nodes.reduce((a, b) => a + b.length, 0);
const nodeToIndex = calcNodeToIndex(nodes);
const nodeElementCount = nodeToIndex[nodeToIndex.length - 1];
const binNodes = new Uint32Array(nodeElementCount);
const lenShift = TrieBlob.NodeMaskNumChildrenShift;
const refShift = TrieBlob.NodeChildRefShift;

let offset = 0;
for (let i = 0; i < nodes.length; ++i) {
const node = nodes[i];
assert(offset === nodeToIndex[i]);
// assert(offset === nodeToIndex[i]);
binNodes[offset++] = (node.length << lenShift) | node[0];
for (let j = 1; j < node.length; ++j) {
const v = node[j];
Expand Down
7 changes: 4 additions & 3 deletions packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,12 @@ export class TrieBlob {
const NodeMaskChildCharIndex = TrieBlob.NodeMaskChildCharIndex;
const NodeChildRefShift = TrieBlob.NodeChildRefShift;
const nodes = this.nodes;
const letterIndexes = [...word].map((char) => this.lookUpCharIndex(char));
const len = word.length;
const charToIndexMap = this.charToIndexMap;
let nodeIdx = 0;
let node = nodes[nodeIdx];
for (let p = 0; p < letterIndexes.length; ++p, node = nodes[nodeIdx]) {
const letterIdx = letterIndexes[p];
for (let p = 0; p < len; ++p, node = nodes[nodeIdx]) {
const letterIdx = charToIndexMap[word[p]];
const count = node & NodeMaskNumChildren;
let i = count - 1;
for (; i > 0; --i) {
Expand Down
23 changes: 18 additions & 5 deletions packages/cspell-trie-lib/src/lib/TrieBlob/test/perfFastTrieBlob.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,16 @@ function getTrie() {
return readTrie('@cspell/dict-en_us/cspell-ext.json');
}

function hasWords(words: string[], method: (word: string) => boolean): boolean {
const len = words.length;
let success = true;
for (let i = 0; i < len; ++i) {
success = method(words[i]) && success;
}
assert(success);
return success;
}

export async function measureFastBlob(which: string | undefined, method: string | undefined) {
const trie = await getTrie();
const words = trie.words().toArray();
Expand All @@ -22,7 +32,8 @@ export async function measureFastBlob(which: string | undefined, method: string

switch (method) {
case 'has':
measure('blob.TrieBlob.has \t\t', () => words.forEach((word) => assert(trieBlob.has(word))));
measure('blob.TrieBlob.has \t\t', () => hasWords(words, (word) => trieBlob.has(word)));
measure('blob.TrieBlob.has \t\t', () => hasWords(words, (word) => trieBlob.has(word)));
break;
case 'dump':
writeFileSync('./TrieBlob.en.json', JSON.stringify(trieBlob, null, 2), 'utf8');
Expand All @@ -33,8 +44,8 @@ export async function measureFastBlob(which: string | undefined, method: string
const tb = measure('blob.TrieBlob.decodeBin \t', () => {
return TrieBlob.decodeBin(readFileSync('./TrieBlob.en.trieb'));

Check warning on line 45 in packages/cspell-trie-lib/src/lib/TrieBlob/test/perfFastTrieBlob.ts

View workflow job for this annotation

GitHub Actions / cspell

Unknown word (trieb)
});
measure('blob.TrieBlob.has \t\t', () => words.forEach((word) => assert(tb.has(word))));
measure('blob.TrieBlob.has \t\t', () => words.forEach((word) => assert(tb.has(word))));
measure('blob.TrieBlob.has \t\t', () => hasWords(words, (word) => tb.has(word)));
measure('blob.TrieBlob.has \t\t', () => hasWords(words, (word) => tb.has(word)));
}
break;
}
Expand All @@ -45,7 +56,8 @@ export async function measureFastBlob(which: string | undefined, method: string

switch (method) {
case 'has':
measure('fast.FastTrieBlob.has \t\t', () => words.forEach((word) => assert(ft.has(word))));
measure('fast.FastTrieBlob.has \t\t', () => hasWords(words, (word) => ft.has(word)));
measure('fast.FastTrieBlob.has \t\t', () => hasWords(words, (word) => ft.has(word)));
break;
}
}
Expand All @@ -58,7 +70,8 @@ export async function measureFastBlob(which: string | undefined, method: string

switch (method) {
case 'has':
measure('trie.Trie.has \t\t\t', () => words.forEach((word) => assert(trie.hasWord(word, true))));
measure('trie.Trie.has \t\t\t', () => hasWords(words, (word) => trie.hasWord(word, true)));
measure('trie.Trie.has \t\t\t', () => hasWords(words, (word) => trie.hasWord(word, true)));
break;
}
}
Expand Down

0 comments on commit 47b6b18

Please sign in to comment.