Skip to content

Commit

Permalink
feat: parse Typos word lists. (#3844)
Browse files Browse the repository at this point in the history
* fix: Correctly annotate the `ignore` dictionary.
* feat: parse Typos word lists.
* feat: Add `reduce` methods to Pipe
* feat: Support a Typos Dictionary
  • Loading branch information
Jason3S committed Nov 15, 2022
1 parent 47a27ef commit 8ddba0e
Show file tree
Hide file tree
Showing 23 changed files with 895 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class IgnoreWordsDictionary implements SpellingDictionary {
private dictNonStrict: Set<string>;
readonly containsNoSuggestWords = true;
readonly options: SpellingDictionaryOptions = {};
readonly type = 'forbidden';
readonly type = 'ignore';
constructor(readonly name: string, readonly source: string, words: Iterable<string>) {
this.dict = new Set(words);
this.dictNonStrict = new Set(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
export type { TypoEntry, TyposDef } from './typos';
export { parseTyposFile, parseTyposLine, processEntriesToTyposDef } from './typosParser';
export { createTyposDef, extractAllSuggestions } from './util';
20 changes: 20 additions & 0 deletions packages/cspell-dictionary/src/SpellingDictionary/Typos/typos.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
type NoSuggestion = null | undefined;
type SingleSuggestion = string;
type MultipleSuggestions = string[];

export type TyposDefValue = MultipleSuggestions | SingleSuggestion | NoSuggestion;
export type TyposDefKey = string;

/**
* Typos Definition
* key - the incorrect word
* value - the suggestions.
*/
export type TyposDef = Record<TyposDefKey, TyposDefValue>;

type TypoNoSuggestions = string;
type TypoWithSuggestionsArray = [forbidWord: string, ...suggestions: string[]];
type TypoWithSuggestionsObj = TyposDef;
type TypoWithSuggestions = TypoWithSuggestionsArray | TypoWithSuggestionsObj;

export type TypoEntry = TypoNoSuggestions | TypoWithSuggestions;
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import { parseTyposFile, createTyposDefFromEntries, processEntriesToTyposDef } from './typosParser';

describe('TypoParser', () => {
test.each`
content | expected
${''} | ${{}}
${'apple ->orange'} | ${{ apple: 'orange' }}
${'apple ->'} | ${{ apple: null }}
${'apple : , '} | ${{ apple: null }}
${'a: b, c'} | ${{ a: ['b', 'c'] }}
${'a: b; c; d:e'} | ${{ a: 'b', c: null, d: 'e' }}
${'a->b , c'} | ${{ a: ['b', 'c'] }}
${'a->b , c'} | ${{ a: ['b', 'c'] }}
${'a->b , c\nb'} | ${{ a: ['b', 'c'], b: null }}
${'a->b , c\nb\na->b'} | ${{ a: 'b', b: null }}
`('parseTyposFile $content', ({ content, expected }) => {
const result = parseTyposFile(content);
expect(result).toEqual(expected);
});

test.each`
entries | expected
${[]} | ${{}}
${['']} | ${{}}
${[['', 'b']]} | ${{}}
${['a']} | ${{ a: null }}
${[['a']]} | ${{ a: null }}
${[['a', 'b']]} | ${{ a: 'b' }}
${[['a', 'b', 'c']]} | ${{ a: ['b', 'c'] }}
`('createTyposDefFromEntries $entries', ({ entries, expected }) => {
const result = createTyposDefFromEntries(entries);
expect(result).toEqual(expected);
});

test.each`
entries | expected
${[]} | ${{}}
${['']} | ${{}}
${[['', 'b']]} | ${{}}
${['a']} | ${{ a: null }}
${[['a']]} | ${{ a: null }}
${[['a', 'b']]} | ${{ a: 'b' }}
${[['a', 'b', 'c']]} | ${{ a: ['b', 'c'] }}
${{ a: ['b'] }} | ${{ a: 'b' }}
${{ a: 'b,c' }} | ${{ a: ['b', 'c'] }}
`('processEntriesToTyposDef $entries', ({ entries, expected }) => {
const result = processEntriesToTyposDef(entries);
expect(result).toEqual(expected);
});

test.each`
entries
${[['a', ['b']]]}
${{ a: {} }}
`('processEntriesToTyposDef errors $entries', ({ entries }) => {
expect(() => processEntriesToTyposDef(entries)).toThrow();
});
});
126 changes: 126 additions & 0 deletions packages/cspell-dictionary/src/SpellingDictionary/Typos/typosParser.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import assert from 'assert';
import { TypoEntry, TyposDef, TyposDefValue } from './typos';
import { appendToDef, createTyposDef } from './util';

function assertString(v: unknown): v is string {
assert(typeof v === 'string', 'A string was expected.');
return true;
}

const suggestionsSeparator = /[,]/;
const typoSuggestionsSeparator = /:|->/;
const typoEntrySeparator = /[\n;]/;
const inlineComment = /#.*/gm;

export function createTyposDefFromEntries(entries: Iterable<TypoEntry>): TyposDef {
const def: TyposDef = Object.create(null);

for (const entry of entries) {
appendToDef(def, entry);
}

return def;
}

function normalize(s: string): string {
return s.normalize();
}

function trimAndFilter(lines: readonly string[]): string[] {
return lines
.map((s) => s.trim())
.filter((s) => !!s)
.map(normalize);
}

function cleanSugs(rawSugs: readonly string[]): TyposDefValue {
const sugs = trimAndFilter(rawSugs);
return sugs.length === 1 ? sugs[0] : sugs.length ? sugs : null;
}

function splitSuggestionsValue(value: string): TyposDefValue {
return cleanSugs(value.split(suggestionsSeparator));
}

export function sanitizeIntoTypoDef(dirtyDef: TyposDef | Record<string, unknown> | unknown): TyposDef | undefined {
if (!dirtyDef || typeof dirtyDef !== 'object') return undefined;

const def = createTyposDef();

for (const [rawKey, value] of Object.entries(dirtyDef)) {
const key = normalize(rawKey.trim());
if (!key) continue;
if (typeof value === 'string') {
def[key] = splitSuggestionsValue(value);
continue;
}
if (Array.isArray(value)) {
const sugs = cleanSugs(value.filter(assertString));
def[key] = sugs;
continue;
}
assert(value === null || value === undefined, 'Unexpected suggestion type.');
def[key] = null;
}

return def;
}

/**
* Used to process entries found in a `cspell.json` file.
* @param entries - entries to process
* @returns a TyposDef
*/
export function processEntriesToTyposDef(entries: TyposDef | readonly TypoEntry[] | Record<string, unknown>): TyposDef {
const def = Array.isArray(entries) ? reduceToTyposDef(entries) : entries;
const result = sanitizeIntoTypoDef(def);
assert(result);
return result;
}

function reduceToTyposDef(entries: Iterable<TypoEntry>): TyposDef {
const def = createTyposDef();
for (const entry of entries) {
appendToDef(def, parseTyposLine(entry));
}
return def;
}

/**
* Tries to parse an entry.
* @param line - any valid TypoEntry.
* @returns a valid TypoEntry
*/
export function parseTyposLine(line: TypoEntry): TypoEntry | undefined {
if (!line) return undefined;
if (typeof line === 'string') {
const def = createTyposDef();
for (const subEntry of splitIntoLines(line)) {
const [left, right] = splitEntry(subEntry);
const typo = left.trim();
if (!right) return typo;
const sugs = splitSuggestionsValue(right);
def[typo] = sugs;
}
return def;
}
if (Array.isArray(line)) {
const [key, ...sugs] = line.filter(assertString).map((s) => s.trim());
if (!key) return undefined;
return [key, ...sugs];
}
return sanitizeIntoTypoDef(line);
}

function splitIntoLines(content: string): string[] {
return trimAndFilter(normalize(content).split(typoEntrySeparator));
}

function splitEntry(line: string): readonly [string, string | undefined] {
return line.split(typoSuggestionsSeparator, 2) as [string, string];
}

export function parseTyposFile(content: string): TyposDef {
const lines = splitIntoLines(content.replace(inlineComment, ''));
return reduceToTyposDef(lines);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import { appendToDef, createTyposDef, extractAllSuggestions, extractIgnoreValues } from './util';

describe('typos/util', () => {
test.each`
def | entry | expected
${{}} | ${''} | ${{}}
${{ a: 'b' }} | ${'a'} | ${{ a: null }}
${{}} | ${['a']} | ${{ a: null }}
${{}} | ${['a', 'b']} | ${{ a: 'b' }}
${{}} | ${['a', 'b', 'c']} | ${{ a: ['b', 'c'] }}
${{ a: 'aa', b: 'bb' }} | ${{ a: 'aaa' }} | ${{ a: 'aaa', b: 'bb' }}
`('appendToDef', ({ def, entry, expected }) => {
expect(appendToDef(def, entry)).toEqual(expected);
});

test.each`
entries | expected
${[]} | ${{}}
${undefined} | ${{}}
${[['a', null]]} | ${{ a: null }}
${[['a', 'b']]} | ${{ a: 'b' }}
${[['a', ['b']]]} | ${{ a: ['b'] }}
${[['a', ['b', 'c']]]} | ${{ a: ['b', 'c'] }}
`('parseTyposFile $entries', ({ entries, expected }) => {
const result = createTyposDef(entries);
expect(result).toEqual(expected);
});

test.each`
typos | expected
${{}} | ${[]}
${{ a: null, b: undefined, c: 'cc', d: ['dd', 'ee'] }} | ${['cc', 'dd', 'ee']}
`('extractAllSuggestions $typos', ({ typos, expected }) => {
const r = extractAllSuggestions(typos);
expect(r).toEqual(new Set(expected));
});

test.each`
typos | expected
${{}} | ${[]}
${{ a: null, b: undefined, c: 'cc', d: ['dd', 'ee'] }} | ${['cc', 'dd', 'ee']}
${{ '!a': null, '!b': undefined, c: 'cc', d: ['dd', 'ee'] }} | ${['cc', 'dd', 'ee', 'a', 'b']}
`('extractIgnoreValues $typos', ({ typos, expected }) => {
const r = extractIgnoreValues(typos, '!');
expect(r).toEqual(new Set(expected));
});
});
60 changes: 60 additions & 0 deletions packages/cspell-dictionary/src/SpellingDictionary/Typos/util.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import { opConcatMap, opFilter, pipe, reduce } from '@cspell/cspell-pipe/sync';
import { TypoEntry, TyposDef, TyposDefKey, TyposDefValue } from './typos';

/**
* Append an entry to a TyposDef.
* @param def - modified in place
* @param entry- entry to add.
* @returns def
*/
export function appendToDef(def: TyposDef, entry: TypoEntry | undefined): TyposDef {
if (!entry) return def;
if (typeof entry === 'string') {
def[entry] = null;
return def;
}
if (Array.isArray(entry)) {
const [key, ...sugs] = entry.map((s) => s.trim());
if (!key) return def;
const s = sugs.map((s) => s.trim()).filter((s) => !!s);
def[key] = !s.length ? null : s.length === 1 ? s[0] : s;
return def;
}

Object.assign(def, entry);
return def;
}

export function createTyposDef(entries?: Iterable<[TyposDefKey, TyposDefValue]>): TyposDef {
const def: TyposDef = Object.create(null);

if (!entries) return def;

for (const [key, value] of entries) {
def[key] = value;
}

return def;
}

export function extractAllSuggestions(typosDef: TyposDef): Set<string> {
const allSugs = pipe(
Object.values(typosDef),
opFilter(isDefined),
opConcatMap((v) => (Array.isArray(v) ? v : [v]))
);
return new Set(allSugs);
}

export function extractIgnoreValues(typosDef: TyposDef, ignorePrefix: string): Set<string> {
const sugs = extractAllSuggestions(typosDef);
const pfxLen = ignorePrefix.length;
const ignoreKeys = Object.keys(typosDef)
.filter((k) => k.startsWith(ignorePrefix))
.map((k) => k.slice(pfxLen));
return reduce(ignoreKeys, (sugs, word) => sugs.add(word), sugs);
}

function isDefined<T>(v: T | undefined | null): v is T {
return v !== undefined && v !== null;
}
Loading

0 comments on commit 8ddba0e

Please sign in to comment.