Skip to content

Commit

Permalink
Merge 7d232bb into 5afe9ee
Browse files Browse the repository at this point in the history
  • Loading branch information
Jason3S committed Jan 4, 2020
2 parents 5afe9ee + 7d232bb commit 44f320d
Show file tree
Hide file tree
Showing 11 changed files with 367 additions and 58 deletions.
1 change: 0 additions & 1 deletion packages/cspell-trie-lib/src/lib/IterableLike.ts

This file was deleted.

57 changes: 57 additions & 0 deletions packages/cspell-trie-lib/src/lib/SimpleDictionaryParser.test.ts
@@ -0,0 +1,57 @@
import { parseDictionary, parseDictionaryLines } from './SimpleDictionaryParser';

describe('Validate SimpleDictionaryParser', () => {
test('test parsing lines', () => {
const expected = [
'Begin',
'~begin',
'Begin+',
'~begin+',
'End',
'~end',
'+End',
'~+end',
'+Middle+',
'~+middle+',
'Café',
'~cafe',
'!forbid',
];
// Basic test
expect([...parseDictionaryLines(dictionary().split('\n'))]).toEqual(expected);
// Use expanded accents
expect([...parseDictionaryLines(dictionary().normalize('NFD').split('\n'))]).toEqual(expected);
});

test('basic test', () => {
const trie = parseDictionary(dictionary());
const result = [...trie.words()];
expect(result).toEqual([
'!forbid',
'+End',
'+Middle+',
'Begin',
'Begin+',
'Café',
'End',
'~+end',
'~+middle+',
'~begin',
'~begin+',
'~cafe',
'~end',
]);
});
});

function dictionary() {
return `
# This is a comment.
Begin*
*End
+Middle+
Café # é becomes e
!forbid # do not allow "forbid"
`;
}
92 changes: 92 additions & 0 deletions packages/cspell-trie-lib/src/lib/SimpleDictionaryParser.ts
@@ -0,0 +1,92 @@
import { operators } from 'gensequence';
import { normalizeWordToLowercase, normalizeWord } from './util';
import { COMPOUND_FIX, OPTIONAL_COMPOUND_FIX, FORBID_PREFIX, CASE_INSENSITIVE_PREFIX, LINE_COMMENT } from './constants';
import { Trie } from './trie';
import { buildTrie } from './TrieBuilder';

export interface ParseDictionaryOptions {
compoundCharacter: string;
optionalCompoundCharacter: string;
forbiddenPrefix: string;
caseInsensitivePrefix: string;
commentCharacter: string;
}

const _defaultOptions: ParseDictionaryOptions = {
commentCharacter: LINE_COMMENT,
optionalCompoundCharacter: OPTIONAL_COMPOUND_FIX,
compoundCharacter: COMPOUND_FIX,
forbiddenPrefix: FORBID_PREFIX,
caseInsensitivePrefix: CASE_INSENSITIVE_PREFIX,
};

export const defaultParseDictionaryOptions: ParseDictionaryOptions = Object.freeze(_defaultOptions);

export function parseDictionaryLines(lines: Iterable<string>, options: ParseDictionaryOptions = _defaultOptions): Iterable<string> {
const {
commentCharacter,
optionalCompoundCharacter: optionalCompound,
compoundCharacter: compound,
caseInsensitivePrefix: ignoreCase,
forbiddenPrefix: forbidden,
} = options;

const regexComment = new RegExp(escapeRegEx(commentCharacter) + '.*', 'g');

function removeComments(line: string): string {
return line.replace(regexComment, '').trim();
}

function filterEmptyLines(line: string): boolean {
return !!line;
}

function *mapOptionalPrefix(line: string) {
if (line[0] === optionalCompound) {
const t = line.slice(1);
yield t;
yield compound + t;
} else {
yield line;
}
}

function *mapOptionalSuffix(line: string) {
if (line.slice(-1) === optionalCompound) {
const t = line.slice(0, -1);
yield t;
yield t + compound;
} else {
yield line;
}
}

function *mapNormalize(line: string) {
yield normalizeWord(line);
if (line[0] !== forbidden) yield ignoreCase + normalizeWordToLowercase(line);
}

const processLines = operators.pipe(
operators.map(removeComments),
operators.filter(filterEmptyLines),
operators.concatMap(mapOptionalPrefix),
operators.concatMap(mapOptionalSuffix),
operators.concatMap(mapNormalize),
);

return processLines(lines);
}

export function parseDictionary(text: string, options: ParseDictionaryOptions = _defaultOptions): Trie {
const lines = parseDictionaryLines(text.split('\n'), options);
return buildTrie([...new Set(lines)].sort(), {
compoundCharacter: options.compoundCharacter,
compoundOptionalCharacter: options.optionalCompoundCharacter,
forbiddenWordPrefix: options.forbiddenPrefix,
stripCaseAndAccentsPrefix: options.caseInsensitivePrefix,
});
}

function escapeRegEx(s: string) {
return s.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
}
4 changes: 2 additions & 2 deletions packages/cspell-trie-lib/src/lib/TrieBuilder.ts
Expand Up @@ -2,8 +2,8 @@ import { TrieNode } from './TrieNode';
import { Trie, PartialTrieOptions, TrieOptions, mergeOptionalWithDefaults } from './trie';
import { consolidate } from './consolidate';

export function buildTrie(words: Iterable<string>): Trie {
return new TrieBuilder(words).build();
export function buildTrie(words: Iterable<string>, trieOptions?: PartialTrieOptions): Trie {
return new TrieBuilder(words, trieOptions).build();
}

interface PathNode {
Expand Down
6 changes: 2 additions & 4 deletions packages/cspell-trie-lib/src/lib/bufferLines.ts
@@ -1,6 +1,4 @@
import { IterableLike } from './IterableLike';

export function *buffer<T>(iter: IterableLike<T>, bufferSize: number): IterableIterator<T[]> {
export function *buffer<T>(iter: Iterable<T>, bufferSize: number): IterableIterator<T[]> {
const buffer: T[] = [];
for (const s of iter) {
buffer.push(s);
Expand All @@ -15,7 +13,7 @@ export function *buffer<T>(iter: IterableLike<T>, bufferSize: number): IterableI
}
}

export function* bufferLines(iter: IterableLike<string>, bufferSize: number, eol: string): IterableIterator<string> {
export function* bufferLines(iter: Iterable<string>, bufferSize: number, eol: string): IterableIterator<string> {
if (eol) {
for (const s of buffer(iter, bufferSize)) {
yield s.join('') + eol;
Expand Down
132 changes: 132 additions & 0 deletions packages/cspell-trie-lib/src/lib/compoundWalker.test.ts
@@ -0,0 +1,132 @@
import { parseDictionary } from './SimpleDictionaryParser';
import { Trie } from './trie';
import { findWord } from './find';
import { WalkNext, WalkItem, compoundWalker, compoundWords } from './compoundWalker';

// cspell:ignore errorerror

describe('Verify compound walker', () => {
test('compoundWords', () => {
const trie = dictionary();
expect(findWord(trie.root, 'errorerror').forbidden).toBe(true);
expect(findWord(trie.root, 'ErrorCodes').found).toBe('errorcodes');
const words1 = [...compoundWords(trie, 1)];
expect(words1).toEqual([
'Code',
'Codes',
'Error',
'Errors',
'Message',
'Time',
]);
const words2 = [...compoundWords(trie, 2)];
expect(words2).toEqual(expected2());
const words3 = [...compoundWords(trie, 3)];
expect(words3).toContain('PrefixMiddleSuffix');
expect(words3).toContain('PrefixErrorCodes');
expect(words3).toHaveLength(216);
words2.forEach(w2 => expect(words3).toContain(w2));
});

test('compoundWords lowercase', () => {
const trie = dictionary();
const words2 = [...compoundWords(trie, 2, false)];
expect(words2).toEqual(expected2().map(a => a.toLowerCase()));
});

test('test compound edges', () => {
const trie = dictionary();
const words1 = [...filterWalker(compoundWalker(trie), 1)];
expect(words1).toEqual([
'Code',
'Codes',
'Code+',
'Error',
'Errors',
'Error+',
'Message',
'Message+',
'Prefix+',
'Time',
'Time+',
]);
});
});

function *filterWalker(stream: Generator<WalkItem, any, WalkNext>, maxDepth: number): Generator<string> {
let item = stream.next();
while (!item.done) {
const { n, s, c, d } = item.value;
if (n.f) {
yield s;
}
if (c) {
yield s + '+';
}
item = stream.next(d < maxDepth);
}
}

function dictionary(): Trie {
return parseDictionary(`
# Sample dictionary
*Error*
*Errors
*Code*
*Codes
*Message*
*Message
*Time*
+Middle+
Prefix+
+Suffix
!errorerror
`);
}

function expected2() {
return [
'Code',
'Codes',
'CodeCode',
'CodeCodes',
'CodeError',
'CodeErrors',
'CodeMessage',
'CodeSuffix',
'CodeTime',
'Error',
'Errors',
'ErrorCode',
'ErrorCodes',
'ErrorError',
'ErrorErrors',
'ErrorMessage',
'ErrorSuffix',
'ErrorTime',
'Message',
'MessageCode',
'MessageCodes',
'MessageError',
'MessageErrors',
'MessageMessage',
'MessageSuffix',
'MessageTime',
'PrefixCode',
'PrefixCodes',
'PrefixError',
'PrefixErrors',
'PrefixMessage',
'PrefixSuffix',
'PrefixTime',
'Time',
'TimeCode',
'TimeCodes',
'TimeError',
'TimeErrors',
'TimeMessage',
'TimeSuffix',
'TimeTime',
];
}
74 changes: 74 additions & 0 deletions packages/cspell-trie-lib/src/lib/compoundWalker.ts
@@ -0,0 +1,74 @@
import { Trie } from './trie';
import { TrieNode } from './TrieNode';


export interface WalkItem {
/** prefix so far */
s: string;
n: TrieNode;
/** compound depth */
d: number;
/** true iff compound edge */
c: boolean;
}

export type WalkNext = boolean;

/**
*
* Depth first walk of a compound trie.
* If there are compound, this becomes an infinite iterator.
* Use i.next(false) to prevent the walker from going deeper into the trie.
*
* @param trie the compound Trie to walk
*/
export function *compoundWalker(trie: Trie, caseSensitive: boolean = true): Generator<WalkItem, any, WalkNext> {
const { compoundCharacter: cc, forbiddenWordPrefix: forbidden, stripCaseAndAccentsPrefix } = trie.options;
const blockNode = new Set([cc, forbidden, stripCaseAndAccentsPrefix]);
const root = !caseSensitive && trie.root.c?.get(stripCaseAndAccentsPrefix) || trie.root;

function *walk(n: TrieNode, s: string, c: boolean, d: number): Generator<WalkItem, any, WalkNext> {
const deeper = yield {n, s, c, d};
if (deeper !== false && n.c) {
for (const [k, cn] of n.c) {
if (blockNode.has(k)) continue;
yield *walk(cn, s + k, false, d);
}
if (n.c.has(cc)) {
const compoundNodes = root.c!.get(cc);
if (compoundNodes) {
yield *walk(compoundNodes, s, true, d + 1);
}
}
}
}

// Make sure we do not walk forbidden and compound only words from the root.
for (const n of root.c || []) {
if (!blockNode.has(n[0])) {
yield *walk(n[1], n[0], false, 0);
}
}
}

/**
*
* @param trie Trie to walk
* @param maxDepth Max compound depth
* @param caseSensitive case sensitive search.
*/
export function *compoundWords(trie: Trie, maxDepth: number, caseSensitive: boolean = true) {
const stream = compoundWalker(trie, caseSensitive);
let item = stream.next();
while (!item.done) {
const { n, s, d } = item.value;
if (d >= maxDepth) {
item = stream.next(false);
continue;
}
if (n.f) {
yield s;
}
item = stream.next();
}
}
1 change: 1 addition & 0 deletions packages/cspell-trie-lib/src/lib/constants.ts
Expand Up @@ -3,3 +3,4 @@ export const COMPOUND_FIX = '+';
export const OPTIONAL_COMPOUND_FIX = '*';
export const CASE_INSENSITIVE_PREFIX = '~';
export const FORBID_PREFIX = '!';
export const LINE_COMMENT = '#';

0 comments on commit 44f320d

Please sign in to comment.