-
-
Notifications
You must be signed in to change notification settings - Fork 101
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: work towards a common base Trie Interface
- Loading branch information
Showing
28 changed files
with
523 additions
and
103 deletions.
There are no files selected for viewing
10 changes: 7 additions & 3 deletions
10
...ll-trie-lib/src/lib/TrieNode/ITrieNode.ts → ...l-trie-lib/src/lib/ITrieNode/ITrieNode.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
import type { PartialWithUndefined } from '../types.js'; | ||
|
||
export interface TrieOptions { | ||
compoundCharacter: string; | ||
stripCaseAndAccentsPrefix: string; | ||
forbiddenWordPrefix: string; | ||
} | ||
export type PartialTrieOptions = PartialWithUndefined<TrieOptions> | undefined; |
59 changes: 59 additions & 0 deletions
59
packages/cspell-trie-lib/src/lib/ITrieNode/trie-util.test.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
import { describe, expect, test } from 'vitest'; | ||
|
||
import { trieRootToITrieRoot } from '../TrieNode/trie.js'; | ||
import { createTrieFromList } from '../TrieNode/trie-util.js'; | ||
import { mergeDefaults } from '../utils/mergeDefaults.js'; | ||
import { countNodes, countWords, findNode, has, iteratorTrieWords } from './trie-util.js'; | ||
|
||
describe('Validate Util Functions', () => { | ||
const trieNode = createTrieFromList(words); | ||
const trie = trieRootToITrieRoot(trieNode); | ||
|
||
test('createTriFromList', () => { | ||
expect(has(trie, 'sample')).toBe(true); | ||
expect(has(trie, 'not found')).toBe(false); | ||
}); | ||
|
||
test('has', () => { | ||
// cspell:ignore sampl | ||
expect(has(trie, 'sample')).toBe(true); | ||
expect(has(trie, 'sampl')).toBe(false); | ||
}); | ||
|
||
test('find', () => { | ||
expect(has(trie, 'sample')).toBe(true); | ||
// cspell:ignore sampl samp | ||
const n0 = findNode(trie, 'sample'); | ||
const n1 = findNode(trie, 'sampl'); | ||
const n2 = findNode(trie, 'samp'); | ||
expect(n0?.eow).toBeTruthy(); | ||
expect(n1?.get('e')).toBe(n0); | ||
expect(n2?.get('l')).toBe(n1); | ||
}); | ||
|
||
test('countNodes', () => { | ||
expect(countNodes(trie)).toBe(73); | ||
}); | ||
|
||
test('countWords', () => { | ||
expect(countWords(trie)).toBe(19); | ||
}); | ||
|
||
test('iteratorTrieWords', () => { | ||
expect([...iteratorTrieWords(trie)].join(' ')).toBe( | ||
'These There are some someone sample space spaces. words worry. with for everyone extra to use, complete is no' | ||
); | ||
}); | ||
|
||
test('mergeDefaults', () => { | ||
const a = { a: 1, b: 'b', c: 'c' }; | ||
const b = { a: 3, b: 'bb' }; | ||
|
||
expect(mergeDefaults(a, b)).toEqual({ a: 1, b: 'b' }); | ||
expect(mergeDefaults(b, a)).toEqual({ a: 3, b: 'bb', c: 'c' }); | ||
}); | ||
}); | ||
|
||
const sentence = | ||
'These are some sample words for everyone to use, complete with extra spaces. There is no space for someone to worry.'; | ||
const words = sentence.split(' '); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
import type { ITrieNode } from './ITrieNode.js'; | ||
import { walker, walkerWords } from './walker.js'; | ||
import type { YieldResult } from './walkerTypes.js'; | ||
|
||
export function isWordTerminationNode(node: ITrieNode): boolean { | ||
return node.eow; | ||
} | ||
|
||
/** | ||
* Generator an iterator that will walk the Trie parent then children in a depth first fashion that preserves sorted order. | ||
*/ | ||
export function walk(node: ITrieNode): Iterable<YieldResult> { | ||
return walker(node); | ||
} | ||
|
||
export const iterateTrie = walk; | ||
|
||
/** | ||
* Generate a Iterator that can walk a Trie and yield the words. | ||
*/ | ||
export function iteratorTrieWords(node: ITrieNode): Iterable<string> { | ||
return walkerWords(node); | ||
} | ||
|
||
export function has(node: ITrieNode, word: string): boolean { | ||
const n = findNode(node, word); | ||
return (n && n.eow) || false; | ||
} | ||
|
||
export function findNode(node: ITrieNode, word: string): ITrieNode | undefined { | ||
for (let i = 0; i < word.length; ++i) { | ||
const n = node.get(word[i]); | ||
if (!n) return undefined; | ||
node = n; | ||
} | ||
return node; | ||
} | ||
|
||
export function countNodes(root: ITrieNode): number { | ||
const seen = new Set<ITrieNode>(); | ||
|
||
function walk(n: ITrieNode) { | ||
if (seen.has(n)) return; | ||
seen.add(n); | ||
for (let i = 0; i < n.size; ++i) { | ||
walk(n.child(i)); | ||
} | ||
} | ||
|
||
walk(root); | ||
return seen.size; | ||
} | ||
|
||
export function countWords(root: ITrieNode): number { | ||
const visited = new Map<ITrieNode, number>(); | ||
|
||
function walk(n: ITrieNode): number { | ||
const nestedCount = visited.get(n); | ||
if (nestedCount !== undefined) { | ||
return nestedCount; | ||
} | ||
|
||
let cnt = n.eow ? 1 : 0; | ||
// add the node to the set to avoid getting stuck on circular references. | ||
visited.set(n, cnt); | ||
|
||
const size = n.size; | ||
for (let i = 0; i < size; ++i) { | ||
cnt += walk(n.child(i)); | ||
} | ||
visited.set(n, cnt); | ||
return cnt; | ||
} | ||
|
||
return walk(root); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
import { describe, expect, test } from 'vitest'; | ||
|
||
import { trieRootToITrieRoot } from '../TrieNode/trie.js'; | ||
import { createTrieFromList } from '../TrieNode/trie-util.js'; | ||
import { walker } from './walker.js'; | ||
import type { WalkerIterator, YieldResult } from './walkerTypes.js'; | ||
|
||
describe('walker', () => { | ||
const trieNode = createTrieFromList(sampleWords.sort()); | ||
const root = trieRootToITrieRoot(trieNode); | ||
|
||
test('walker', () => { | ||
const i = walker(root); | ||
const result = walkerToArray(i, 4); | ||
expect(result).toEqual(sampleWords.filter((a) => a.length <= 4).sort()); | ||
}); | ||
}); | ||
|
||
function walkerToArray(w: WalkerIterator, depth: number): string[] { | ||
const maxDepth = depth - 1; | ||
let goDeeper = true; | ||
let ir: IteratorResult<YieldResult>; | ||
const result: string[] = []; | ||
while (!(ir = w.next(goDeeper)).done) { | ||
const { text, node, depth } = ir.value; | ||
if (node.eow) { | ||
result.push(text); | ||
} | ||
goDeeper = depth < maxDepth; | ||
} | ||
return result; | ||
} | ||
|
||
const sampleWords = [ | ||
'walk', | ||
'walked', | ||
'walker', | ||
'walking', | ||
'walks', | ||
'talk', | ||
'talks', | ||
'talked', | ||
'talker', | ||
'talking', | ||
'lift', | ||
'lifts', | ||
'lifted', | ||
'lifter', | ||
'lifting', | ||
'journal', | ||
'journals', | ||
'journalism', | ||
'journalist', | ||
'journalistic', | ||
'journey', | ||
'journeyer', | ||
'journeyman', | ||
'journeymen', | ||
'joust', | ||
'jouster', | ||
'jousting', | ||
'jovial', | ||
'joviality', | ||
'jowl', | ||
'jowly', | ||
'joy', | ||
'joyful', | ||
'joyfuller', | ||
'joyfullest', | ||
'joyfulness', | ||
'joyless', | ||
'joylessness', | ||
'joyous', | ||
'joyousness', | ||
'joyridden', | ||
'joyride', | ||
'joyrider', | ||
'joyriding', | ||
'joyrode', | ||
'joystick', | ||
]; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
import type { ITrieNode } from './ITrieNode.js'; | ||
import type { WalkerIterator } from './walkerTypes.js'; | ||
import { CompoundWordsMethod, JOIN_SEPARATOR, WORD_SEPARATOR } from './walkerTypes.js'; | ||
|
||
/** | ||
* Walks the Trie and yields a value at each node. | ||
* next(goDeeper: boolean): | ||
*/ | ||
function* compoundWalker(root: ITrieNode, compoundingMethod: CompoundWordsMethod): WalkerIterator { | ||
type Children = Readonly<Array<readonly [string, ITrieNode]>>; | ||
const empty: Children = Object.freeze([] as Children); | ||
const roots: { [index: number]: Children } = { | ||
[CompoundWordsMethod.NONE]: empty, | ||
[CompoundWordsMethod.JOIN_WORDS]: [[JOIN_SEPARATOR, root]], | ||
[CompoundWordsMethod.SEPARATE_WORDS]: [[WORD_SEPARATOR, root]], | ||
}; | ||
|
||
const rc = roots[compoundingMethod].length ? roots[compoundingMethod] : undefined; | ||
|
||
function children(n: ITrieNode): Children { | ||
if (n.hasChildren()) { | ||
const c = n.keys().map((k, i) => [k, n.child(i)] as const); | ||
return n.eow && rc ? c.concat(rc) : c; | ||
} | ||
if (n.eow) { | ||
return roots[compoundingMethod]; | ||
} | ||
return empty; | ||
} | ||
|
||
let depth = 0; | ||
const stack: { t: string; c: Children; ci: number }[] = []; | ||
stack[depth] = { t: '', c: children(root), ci: 0 }; | ||
while (depth >= 0) { | ||
let s = stack[depth]; | ||
let baseText = s.t; | ||
while (s.ci < s.c.length) { | ||
const [char, node] = s.c[s.ci++]; | ||
const text = baseText + char; | ||
const goDeeper = yield { text, node, depth }; | ||
if (goDeeper ?? true) { | ||
depth++; | ||
baseText = text; | ||
stack[depth] = { t: text, c: children(node), ci: 0 }; | ||
} | ||
s = stack[depth]; | ||
} | ||
depth -= 1; | ||
} | ||
} | ||
|
||
/** | ||
* Walks the Trie and yields a value at each node. | ||
* next(goDeeper: boolean): | ||
*/ | ||
function* nodeWalker(root: ITrieNode): WalkerIterator { | ||
type Children = Readonly<Array<string>>; | ||
|
||
let depth = 0; | ||
const stack: { t: string; n: ITrieNode; c: Children; ci: number }[] = []; | ||
stack[depth] = { t: '', n: root, c: root.keys(), ci: 0 }; | ||
while (depth >= 0) { | ||
let s = stack[depth]; | ||
let baseText = s.t; | ||
while (s.ci < s.c.length && s.n) { | ||
const idx = s.ci++; | ||
const char = s.c[idx]; | ||
const node = s.n.child(idx); | ||
const text = baseText + char; | ||
const goDeeper = yield { text, node, depth }; | ||
if (goDeeper !== false) { | ||
depth++; | ||
baseText = text; | ||
const s = stack[depth]; | ||
const c = node.keys(); | ||
if (s) { | ||
s.t = text; | ||
s.n = node; | ||
s.c = c; | ||
s.ci = 0; | ||
} else { | ||
stack[depth] = { t: text, n: node, c, ci: 0 }; | ||
} | ||
} | ||
s = stack[depth]; | ||
} | ||
depth -= 1; | ||
} | ||
} | ||
|
||
export function walker( | ||
root: ITrieNode, | ||
compoundingMethod: CompoundWordsMethod = CompoundWordsMethod.NONE | ||
): WalkerIterator { | ||
return compoundingMethod === CompoundWordsMethod.NONE ? nodeWalker(root) : compoundWalker(root, compoundingMethod); | ||
} | ||
|
||
export function walkerWords(root: ITrieNode): Iterable<string> { | ||
return walkerWordsITrie(root); | ||
} | ||
|
||
/** | ||
* Walks the Trie and yields each word. | ||
*/ | ||
export function* walkerWordsITrie(root: ITrieNode): Iterable<string> { | ||
type Children = readonly string[]; | ||
interface Stack { | ||
t: string; | ||
n: ITrieNode; | ||
c: Children; | ||
ci: number; | ||
} | ||
|
||
let depth = 0; | ||
const stack: Stack[] = []; | ||
stack[depth] = { t: '', n: root, c: root.keys(), ci: 0 }; | ||
while (depth >= 0) { | ||
let s = stack[depth]; | ||
let baseText = s.t; | ||
while (s.ci < s.c.length && s.n) { | ||
const char = s.c[s.ci++]; | ||
const node = s.n.get(char); | ||
if (!node) continue; | ||
const text = baseText + char; | ||
if (node.eow) yield text; | ||
depth++; | ||
baseText = text; | ||
const c = node.keys(); | ||
if (stack[depth]) { | ||
s = stack[depth]; | ||
s.t = text; | ||
s.n = node; | ||
s.c = c; | ||
s.ci = 0; | ||
} else { | ||
stack[depth] = { t: text, n: node, c, ci: 0 }; | ||
} | ||
s = stack[depth]; | ||
} | ||
depth -= 1; | ||
} | ||
} |
Oops, something went wrong.