Skip to content

Commit

Permalink
fix: work towards a common base Trie Interface (#4483)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jason3S committed May 14, 2023
1 parent 2b6fb3c commit 88bead7
Show file tree
Hide file tree
Showing 28 changed files with 523 additions and 103 deletions.
Original file line number Diff line number Diff line change
@@ -1,18 +1,22 @@
import type { TrieOptions } from './TrieNode.js';
import type { TrieOptions } from './TrieOptions.js';

export interface ITrieNode {
/** flag End of Word */
readonly eow: boolean;
/** number of children */
readonly size: number;
/** get keys to children */
getKeys(): readonly string[];
keys(): readonly string[];
/** get keys to children */
values(): readonly ITrieNode[];
/** get child ITrieNode */
get(char: string): ITrieNode | undefined;
/** get a child by the key index */
child(idx: number): ITrieNode | undefined;
child(idx: number): ITrieNode;
/** has child */
has(char: string): boolean;
/** `true` iff this node has children */
hasChildren(): boolean;
}

export interface ITrieNodeRoot extends ITrieNode {
Expand Down
8 changes: 8 additions & 0 deletions packages/cspell-trie-lib/src/lib/ITrieNode/TrieOptions.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import type { PartialWithUndefined } from '../types.js';

export interface TrieOptions {
compoundCharacter: string;
stripCaseAndAccentsPrefix: string;
forbiddenWordPrefix: string;
}
export type PartialTrieOptions = PartialWithUndefined<TrieOptions> | undefined;
59 changes: 59 additions & 0 deletions packages/cspell-trie-lib/src/lib/ITrieNode/trie-util.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import { describe, expect, test } from 'vitest';

import { trieRootToITrieRoot } from '../TrieNode/trie.js';
import { createTrieFromList } from '../TrieNode/trie-util.js';
import { mergeDefaults } from '../utils/mergeDefaults.js';
import { countNodes, countWords, findNode, has, iteratorTrieWords } from './trie-util.js';

describe('Validate Util Functions', () => {
const trieNode = createTrieFromList(words);
const trie = trieRootToITrieRoot(trieNode);

test('createTriFromList', () => {
expect(has(trie, 'sample')).toBe(true);
expect(has(trie, 'not found')).toBe(false);
});

test('has', () => {
// cspell:ignore sampl
expect(has(trie, 'sample')).toBe(true);
expect(has(trie, 'sampl')).toBe(false);
});

test('find', () => {
expect(has(trie, 'sample')).toBe(true);
// cspell:ignore sampl samp
const n0 = findNode(trie, 'sample');
const n1 = findNode(trie, 'sampl');
const n2 = findNode(trie, 'samp');
expect(n0?.eow).toBeTruthy();
expect(n1?.get('e')).toBe(n0);
expect(n2?.get('l')).toBe(n1);
});

test('countNodes', () => {
expect(countNodes(trie)).toBe(73);
});

test('countWords', () => {
expect(countWords(trie)).toBe(19);
});

test('iteratorTrieWords', () => {
expect([...iteratorTrieWords(trie)].join(' ')).toBe(
'These There are some someone sample space spaces. words worry. with for everyone extra to use, complete is no'
);
});

test('mergeDefaults', () => {
const a = { a: 1, b: 'b', c: 'c' };
const b = { a: 3, b: 'bb' };

expect(mergeDefaults(a, b)).toEqual({ a: 1, b: 'b' });
expect(mergeDefaults(b, a)).toEqual({ a: 3, b: 'bb', c: 'c' });
});
});

const sentence =
'These are some sample words for everyone to use, complete with extra spaces. There is no space for someone to worry.';
const words = sentence.split(' ');
76 changes: 76 additions & 0 deletions packages/cspell-trie-lib/src/lib/ITrieNode/trie-util.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import type { ITrieNode } from './ITrieNode.js';
import { walker, walkerWords } from './walker.js';
import type { YieldResult } from './walkerTypes.js';

export function isWordTerminationNode(node: ITrieNode): boolean {
return node.eow;
}

/**
* Generator an iterator that will walk the Trie parent then children in a depth first fashion that preserves sorted order.
*/
export function walk(node: ITrieNode): Iterable<YieldResult> {
return walker(node);
}

export const iterateTrie = walk;

/**
* Generate a Iterator that can walk a Trie and yield the words.
*/
export function iteratorTrieWords(node: ITrieNode): Iterable<string> {
return walkerWords(node);
}

export function has(node: ITrieNode, word: string): boolean {
const n = findNode(node, word);
return (n && n.eow) || false;
}

export function findNode(node: ITrieNode, word: string): ITrieNode | undefined {
for (let i = 0; i < word.length; ++i) {
const n = node.get(word[i]);
if (!n) return undefined;
node = n;
}
return node;
}

export function countNodes(root: ITrieNode): number {
const seen = new Set<ITrieNode>();

function walk(n: ITrieNode) {
if (seen.has(n)) return;
seen.add(n);
for (let i = 0; i < n.size; ++i) {
walk(n.child(i));
}
}

walk(root);
return seen.size;
}

export function countWords(root: ITrieNode): number {
const visited = new Map<ITrieNode, number>();

function walk(n: ITrieNode): number {
const nestedCount = visited.get(n);
if (nestedCount !== undefined) {
return nestedCount;
}

let cnt = n.eow ? 1 : 0;
// add the node to the set to avoid getting stuck on circular references.
visited.set(n, cnt);

const size = n.size;
for (let i = 0; i < size; ++i) {
cnt += walk(n.child(i));
}
visited.set(n, cnt);
return cnt;
}

return walk(root);
}
81 changes: 81 additions & 0 deletions packages/cspell-trie-lib/src/lib/ITrieNode/walker.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import { describe, expect, test } from 'vitest';

import { trieRootToITrieRoot } from '../TrieNode/trie.js';
import { createTrieFromList } from '../TrieNode/trie-util.js';
import { walker } from './walker.js';
import type { WalkerIterator, YieldResult } from './walkerTypes.js';

describe('walker', () => {
const trieNode = createTrieFromList(sampleWords.sort());
const root = trieRootToITrieRoot(trieNode);

test('walker', () => {
const i = walker(root);
const result = walkerToArray(i, 4);
expect(result).toEqual(sampleWords.filter((a) => a.length <= 4).sort());
});
});

function walkerToArray(w: WalkerIterator, depth: number): string[] {
const maxDepth = depth - 1;
let goDeeper = true;
let ir: IteratorResult<YieldResult>;
const result: string[] = [];
while (!(ir = w.next(goDeeper)).done) {
const { text, node, depth } = ir.value;
if (node.eow) {
result.push(text);
}
goDeeper = depth < maxDepth;
}
return result;
}

const sampleWords = [
'walk',
'walked',
'walker',
'walking',
'walks',
'talk',
'talks',
'talked',
'talker',
'talking',
'lift',
'lifts',
'lifted',
'lifter',
'lifting',
'journal',
'journals',
'journalism',
'journalist',
'journalistic',
'journey',
'journeyer',
'journeyman',
'journeymen',
'joust',
'jouster',
'jousting',
'jovial',
'joviality',
'jowl',
'jowly',
'joy',
'joyful',
'joyfuller',
'joyfullest',
'joyfulness',
'joyless',
'joylessness',
'joyous',
'joyousness',
'joyridden',
'joyride',
'joyrider',
'joyriding',
'joyrode',
'joystick',
];
142 changes: 142 additions & 0 deletions packages/cspell-trie-lib/src/lib/ITrieNode/walker.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
import type { ITrieNode } from './ITrieNode.js';
import type { WalkerIterator } from './walkerTypes.js';
import { CompoundWordsMethod, JOIN_SEPARATOR, WORD_SEPARATOR } from './walkerTypes.js';

/**
* Walks the Trie and yields a value at each node.
* next(goDeeper: boolean):
*/
function* compoundWalker(root: ITrieNode, compoundingMethod: CompoundWordsMethod): WalkerIterator {
type Children = Readonly<Array<readonly [string, ITrieNode]>>;
const empty: Children = Object.freeze([] as Children);
const roots: { [index: number]: Children } = {
[CompoundWordsMethod.NONE]: empty,
[CompoundWordsMethod.JOIN_WORDS]: [[JOIN_SEPARATOR, root]],
[CompoundWordsMethod.SEPARATE_WORDS]: [[WORD_SEPARATOR, root]],
};

const rc = roots[compoundingMethod].length ? roots[compoundingMethod] : undefined;

function children(n: ITrieNode): Children {
if (n.hasChildren()) {
const c = n.keys().map((k, i) => [k, n.child(i)] as const);
return n.eow && rc ? c.concat(rc) : c;
}
if (n.eow) {
return roots[compoundingMethod];
}
return empty;
}

let depth = 0;
const stack: { t: string; c: Children; ci: number }[] = [];
stack[depth] = { t: '', c: children(root), ci: 0 };
while (depth >= 0) {
let s = stack[depth];
let baseText = s.t;
while (s.ci < s.c.length) {
const [char, node] = s.c[s.ci++];
const text = baseText + char;
const goDeeper = yield { text, node, depth };
if (goDeeper ?? true) {
depth++;
baseText = text;
stack[depth] = { t: text, c: children(node), ci: 0 };
}
s = stack[depth];
}
depth -= 1;
}
}

/**
* Walks the Trie and yields a value at each node.
* next(goDeeper: boolean):
*/
function* nodeWalker(root: ITrieNode): WalkerIterator {
type Children = Readonly<Array<string>>;

let depth = 0;
const stack: { t: string; n: ITrieNode; c: Children; ci: number }[] = [];
stack[depth] = { t: '', n: root, c: root.keys(), ci: 0 };
while (depth >= 0) {
let s = stack[depth];
let baseText = s.t;
while (s.ci < s.c.length && s.n) {
const idx = s.ci++;
const char = s.c[idx];
const node = s.n.child(idx);
const text = baseText + char;
const goDeeper = yield { text, node, depth };
if (goDeeper !== false) {
depth++;
baseText = text;
const s = stack[depth];
const c = node.keys();
if (s) {
s.t = text;
s.n = node;
s.c = c;
s.ci = 0;
} else {
stack[depth] = { t: text, n: node, c, ci: 0 };
}
}
s = stack[depth];
}
depth -= 1;
}
}

export function walker(
root: ITrieNode,
compoundingMethod: CompoundWordsMethod = CompoundWordsMethod.NONE
): WalkerIterator {
return compoundingMethod === CompoundWordsMethod.NONE ? nodeWalker(root) : compoundWalker(root, compoundingMethod);
}

export function walkerWords(root: ITrieNode): Iterable<string> {
return walkerWordsITrie(root);
}

/**
* Walks the Trie and yields each word.
*/
export function* walkerWordsITrie(root: ITrieNode): Iterable<string> {
type Children = readonly string[];
interface Stack {
t: string;
n: ITrieNode;
c: Children;
ci: number;
}

let depth = 0;
const stack: Stack[] = [];
stack[depth] = { t: '', n: root, c: root.keys(), ci: 0 };
while (depth >= 0) {
let s = stack[depth];
let baseText = s.t;
while (s.ci < s.c.length && s.n) {
const char = s.c[s.ci++];
const node = s.n.get(char);
if (!node) continue;
const text = baseText + char;
if (node.eow) yield text;
depth++;
baseText = text;
const c = node.keys();
if (stack[depth]) {
s = stack[depth];
s.t = text;
s.n = node;
s.c = c;
s.ci = 0;
} else {
stack[depth] = { t: text, n: node, c, ci: 0 };
}
s = stack[depth];
}
depth -= 1;
}
}
Loading

0 comments on commit 88bead7

Please sign in to comment.