Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[cspell-trie-lib] Compound Walker to be used with suggestions. #167

Merged
merged 3 commits into from Jan 4, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 0 additions & 1 deletion packages/cspell-trie-lib/src/lib/IterableLike.ts

This file was deleted.

57 changes: 57 additions & 0 deletions packages/cspell-trie-lib/src/lib/SimpleDictionaryParser.test.ts
@@ -0,0 +1,57 @@
import { parseDictionary, parseDictionaryLines } from './SimpleDictionaryParser';

describe('Validate SimpleDictionaryParser', () => {
test('test parsing lines', () => {
const expected = [
'Begin',
'~begin',
'Begin+',
'~begin+',
'End',
'~end',
'+End',
'~+end',
'+Middle+',
'~+middle+',
'Café',
'~cafe',
'!forbid',
];
// Basic test
expect([...parseDictionaryLines(dictionary().split('\n'))]).toEqual(expected);
// Use expanded accents
expect([...parseDictionaryLines(dictionary().normalize('NFD').split('\n'))]).toEqual(expected);
});

test('basic test', () => {
const trie = parseDictionary(dictionary());
const result = [...trie.words()];
expect(result).toEqual([
'!forbid',
'+End',
'+Middle+',
'Begin',
'Begin+',
'Café',
'End',
'~+end',
'~+middle+',
'~begin',
'~begin+',
'~cafe',
'~end',
]);
});
});

function dictionary() {
return `
# This is a comment.

Begin*
*End
+Middle+
Café # é becomes e
!forbid # do not allow "forbid"
`;
}
92 changes: 92 additions & 0 deletions packages/cspell-trie-lib/src/lib/SimpleDictionaryParser.ts
@@ -0,0 +1,92 @@
import { operators } from 'gensequence';
import { normalizeWordToLowercase, normalizeWord } from './util';
import { COMPOUND_FIX, OPTIONAL_COMPOUND_FIX, FORBID_PREFIX, CASE_INSENSITIVE_PREFIX, LINE_COMMENT } from './constants';
import { Trie } from './trie';
import { buildTrie } from './TrieBuilder';

export interface ParseDictionaryOptions {
compoundCharacter: string;
optionalCompoundCharacter: string;
forbiddenPrefix: string;
caseInsensitivePrefix: string;
commentCharacter: string;
}

const _defaultOptions: ParseDictionaryOptions = {
commentCharacter: LINE_COMMENT,
optionalCompoundCharacter: OPTIONAL_COMPOUND_FIX,
compoundCharacter: COMPOUND_FIX,
forbiddenPrefix: FORBID_PREFIX,
caseInsensitivePrefix: CASE_INSENSITIVE_PREFIX,
};

export const defaultParseDictionaryOptions: ParseDictionaryOptions = Object.freeze(_defaultOptions);

export function parseDictionaryLines(lines: Iterable<string>, options: ParseDictionaryOptions = _defaultOptions): Iterable<string> {
const {
commentCharacter,
optionalCompoundCharacter: optionalCompound,
compoundCharacter: compound,
caseInsensitivePrefix: ignoreCase,
forbiddenPrefix: forbidden,
} = options;

const regexComment = new RegExp(escapeRegEx(commentCharacter) + '.*', 'g');

function removeComments(line: string): string {
return line.replace(regexComment, '').trim();
}

function filterEmptyLines(line: string): boolean {
return !!line;
}

function *mapOptionalPrefix(line: string) {
if (line[0] === optionalCompound) {
const t = line.slice(1);
yield t;
yield compound + t;
} else {
yield line;
}
}

function *mapOptionalSuffix(line: string) {
if (line.slice(-1) === optionalCompound) {
const t = line.slice(0, -1);
yield t;
yield t + compound;
} else {
yield line;
}
}

function *mapNormalize(line: string) {
yield normalizeWord(line);
if (line[0] !== forbidden) yield ignoreCase + normalizeWordToLowercase(line);
}

const processLines = operators.pipe(
operators.map(removeComments),
operators.filter(filterEmptyLines),
operators.concatMap(mapOptionalPrefix),
operators.concatMap(mapOptionalSuffix),
operators.concatMap(mapNormalize),
);

return processLines(lines);
}

export function parseDictionary(text: string, options: ParseDictionaryOptions = _defaultOptions): Trie {
const lines = parseDictionaryLines(text.split('\n'), options);
return buildTrie([...new Set(lines)].sort(), {
compoundCharacter: options.compoundCharacter,
compoundOptionalCharacter: options.optionalCompoundCharacter,
forbiddenWordPrefix: options.forbiddenPrefix,
stripCaseAndAccentsPrefix: options.caseInsensitivePrefix,
});
}

function escapeRegEx(s: string) {
return s.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
}
4 changes: 2 additions & 2 deletions packages/cspell-trie-lib/src/lib/TrieBuilder.ts
Expand Up @@ -2,8 +2,8 @@ import { TrieNode } from './TrieNode';
import { Trie, PartialTrieOptions, TrieOptions, mergeOptionalWithDefaults } from './trie';
import { consolidate } from './consolidate';

export function buildTrie(words: Iterable<string>): Trie {
return new TrieBuilder(words).build();
export function buildTrie(words: Iterable<string>, trieOptions?: PartialTrieOptions): Trie {
return new TrieBuilder(words, trieOptions).build();
}

interface PathNode {
Expand Down
6 changes: 2 additions & 4 deletions packages/cspell-trie-lib/src/lib/bufferLines.ts
@@ -1,6 +1,4 @@
import { IterableLike } from './IterableLike';

export function *buffer<T>(iter: IterableLike<T>, bufferSize: number): IterableIterator<T[]> {
export function *buffer<T>(iter: Iterable<T>, bufferSize: number): IterableIterator<T[]> {
const buffer: T[] = [];
for (const s of iter) {
buffer.push(s);
Expand All @@ -15,7 +13,7 @@ export function *buffer<T>(iter: IterableLike<T>, bufferSize: number): IterableI
}
}

export function* bufferLines(iter: IterableLike<string>, bufferSize: number, eol: string): IterableIterator<string> {
export function* bufferLines(iter: Iterable<string>, bufferSize: number, eol: string): IterableIterator<string> {
if (eol) {
for (const s of buffer(iter, bufferSize)) {
yield s.join('') + eol;
Expand Down
132 changes: 132 additions & 0 deletions packages/cspell-trie-lib/src/lib/compoundWalker.test.ts
@@ -0,0 +1,132 @@
import { parseDictionary } from './SimpleDictionaryParser';
import { Trie } from './trie';
import { findWord } from './find';
import { WalkNext, WalkItem, compoundWalker, compoundWords } from './compoundWalker';

// cspell:ignore errorerror

describe('Verify compound walker', () => {
test('compoundWords', () => {
const trie = dictionary();
expect(findWord(trie.root, 'errorerror').forbidden).toBe(true);
expect(findWord(trie.root, 'ErrorCodes').found).toBe('errorcodes');
const words1 = [...compoundWords(trie, 1)];
expect(words1).toEqual([
'Code',
'Codes',
'Error',
'Errors',
'Message',
'Time',
]);
const words2 = [...compoundWords(trie, 2)];
expect(words2).toEqual(expected2());
const words3 = [...compoundWords(trie, 3)];
expect(words3).toContain('PrefixMiddleSuffix');
expect(words3).toContain('PrefixErrorCodes');
expect(words3).toHaveLength(216);
words2.forEach(w2 => expect(words3).toContain(w2));
});

test('compoundWords lowercase', () => {
const trie = dictionary();
const words2 = [...compoundWords(trie, 2, false)];
expect(words2).toEqual(expected2().map(a => a.toLowerCase()));
});

test('test compound edges', () => {
const trie = dictionary();
const words1 = [...filterWalker(compoundWalker(trie), 1)];
expect(words1).toEqual([
'Code',
'Codes',
'Code+',
'Error',
'Errors',
'Error+',
'Message',
'Message+',
'Prefix+',
'Time',
'Time+',
]);
});
});

function *filterWalker(stream: Generator<WalkItem, any, WalkNext>, maxDepth: number): Generator<string> {
let item = stream.next();
while (!item.done) {
const { n, s, c, d } = item.value;
if (n.f) {
yield s;
}
if (c) {
yield s + '+';
}
item = stream.next(d < maxDepth);
}
}

function dictionary(): Trie {
return parseDictionary(`
# Sample dictionary
*Error*
*Errors
*Code*
*Codes
*Message*
*Message
*Time*
+Middle+
Prefix+
+Suffix

!errorerror
`);
}

function expected2() {
return [
'Code',
'Codes',
'CodeCode',
'CodeCodes',
'CodeError',
'CodeErrors',
'CodeMessage',
'CodeSuffix',
'CodeTime',
'Error',
'Errors',
'ErrorCode',
'ErrorCodes',
'ErrorError',
'ErrorErrors',
'ErrorMessage',
'ErrorSuffix',
'ErrorTime',
'Message',
'MessageCode',
'MessageCodes',
'MessageError',
'MessageErrors',
'MessageMessage',
'MessageSuffix',
'MessageTime',
'PrefixCode',
'PrefixCodes',
'PrefixError',
'PrefixErrors',
'PrefixMessage',
'PrefixSuffix',
'PrefixTime',
'Time',
'TimeCode',
'TimeCodes',
'TimeError',
'TimeErrors',
'TimeMessage',
'TimeSuffix',
'TimeTime',
];
}
74 changes: 74 additions & 0 deletions packages/cspell-trie-lib/src/lib/compoundWalker.ts
@@ -0,0 +1,74 @@
import { Trie } from './trie';
import { TrieNode } from './TrieNode';


export interface WalkItem {
/** prefix so far */
s: string;
n: TrieNode;
/** compound depth */
d: number;
/** true iff compound edge */
c: boolean;
}

export type WalkNext = boolean;

/**
*
* Depth first walk of a compound trie.
* If there are compound, this becomes an infinite iterator.
* Use i.next(false) to prevent the walker from going deeper into the trie.
*
* @param trie the compound Trie to walk
*/
export function *compoundWalker(trie: Trie, caseSensitive: boolean = true): Generator<WalkItem, any, WalkNext> {
const { compoundCharacter: cc, forbiddenWordPrefix: forbidden, stripCaseAndAccentsPrefix } = trie.options;
const blockNode = new Set([cc, forbidden, stripCaseAndAccentsPrefix]);
const root = !caseSensitive && trie.root.c?.get(stripCaseAndAccentsPrefix) || trie.root;

function *walk(n: TrieNode, s: string, c: boolean, d: number): Generator<WalkItem, any, WalkNext> {
const deeper = yield {n, s, c, d};
if (deeper !== false && n.c) {
for (const [k, cn] of n.c) {
if (blockNode.has(k)) continue;
yield *walk(cn, s + k, false, d);
}
if (n.c.has(cc)) {
const compoundNodes = root.c!.get(cc);
if (compoundNodes) {
yield *walk(compoundNodes, s, true, d + 1);
}
}
}
}

// Make sure we do not walk forbidden and compound only words from the root.
for (const n of root.c || []) {
if (!blockNode.has(n[0])) {
yield *walk(n[1], n[0], false, 0);
}
}
}

/**
*
* @param trie Trie to walk
* @param maxDepth Max compound depth
* @param caseSensitive case sensitive search.
*/
export function *compoundWords(trie: Trie, maxDepth: number, caseSensitive: boolean = true) {
const stream = compoundWalker(trie, caseSensitive);
let item = stream.next();
while (!item.done) {
const { n, s, d } = item.value;
if (d >= maxDepth) {
item = stream.next(false);
continue;
}
if (n.f) {
yield s;
}
item = stream.next();
}
}
1 change: 1 addition & 0 deletions packages/cspell-trie-lib/src/lib/constants.ts
Expand Up @@ -3,3 +3,4 @@ export const COMPOUND_FIX = '+';
export const OPTIONAL_COMPOUND_FIX = '*';
export const CASE_INSENSITIVE_PREFIX = '~';
export const FORBID_PREFIX = '!';
export const LINE_COMMENT = '#';