-
-
Notifications
You must be signed in to change notification settings - Fork 85
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: parse Typos word lists. (#3844)
* fix: Correctly annotate the `ignore` dictionary. * feat: parse Typos word lists. * feat: Add `reduce` methods to Pipe * feat: Support a Typos Dictionary
- Loading branch information
Showing
23 changed files
with
895 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
3 changes: 3 additions & 0 deletions
3
packages/cspell-dictionary/src/SpellingDictionary/Typos/index.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
export type { TypoEntry, TyposDef } from './typos'; | ||
export { parseTyposFile, parseTyposLine, processEntriesToTyposDef } from './typosParser'; | ||
export { createTyposDef, extractAllSuggestions } from './util'; |
20 changes: 20 additions & 0 deletions
20
packages/cspell-dictionary/src/SpellingDictionary/Typos/typos.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
type NoSuggestion = null | undefined; | ||
type SingleSuggestion = string; | ||
type MultipleSuggestions = string[]; | ||
|
||
export type TyposDefValue = MultipleSuggestions | SingleSuggestion | NoSuggestion; | ||
export type TyposDefKey = string; | ||
|
||
/** | ||
* Typos Definition | ||
* key - the incorrect word | ||
* value - the suggestions. | ||
*/ | ||
export type TyposDef = Record<TyposDefKey, TyposDefValue>; | ||
|
||
type TypoNoSuggestions = string; | ||
type TypoWithSuggestionsArray = [forbidWord: string, ...suggestions: string[]]; | ||
type TypoWithSuggestionsObj = TyposDef; | ||
type TypoWithSuggestions = TypoWithSuggestionsArray | TypoWithSuggestionsObj; | ||
|
||
export type TypoEntry = TypoNoSuggestions | TypoWithSuggestions; |
58 changes: 58 additions & 0 deletions
58
packages/cspell-dictionary/src/SpellingDictionary/Typos/typosParser.test.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
import { parseTyposFile, createTyposDefFromEntries, processEntriesToTyposDef } from './typosParser'; | ||
|
||
describe('TypoParser', () => { | ||
test.each` | ||
content | expected | ||
${''} | ${{}} | ||
${'apple ->orange'} | ${{ apple: 'orange' }} | ||
${'apple ->'} | ${{ apple: null }} | ||
${'apple : , '} | ${{ apple: null }} | ||
${'a: b, c'} | ${{ a: ['b', 'c'] }} | ||
${'a: b; c; d:e'} | ${{ a: 'b', c: null, d: 'e' }} | ||
${'a->b , c'} | ${{ a: ['b', 'c'] }} | ||
${'a->b , c'} | ${{ a: ['b', 'c'] }} | ||
${'a->b , c\nb'} | ${{ a: ['b', 'c'], b: null }} | ||
${'a->b , c\nb\na->b'} | ${{ a: 'b', b: null }} | ||
`('parseTyposFile $content', ({ content, expected }) => { | ||
const result = parseTyposFile(content); | ||
expect(result).toEqual(expected); | ||
}); | ||
|
||
test.each` | ||
entries | expected | ||
${[]} | ${{}} | ||
${['']} | ${{}} | ||
${[['', 'b']]} | ${{}} | ||
${['a']} | ${{ a: null }} | ||
${[['a']]} | ${{ a: null }} | ||
${[['a', 'b']]} | ${{ a: 'b' }} | ||
${[['a', 'b', 'c']]} | ${{ a: ['b', 'c'] }} | ||
`('createTyposDefFromEntries $entries', ({ entries, expected }) => { | ||
const result = createTyposDefFromEntries(entries); | ||
expect(result).toEqual(expected); | ||
}); | ||
|
||
test.each` | ||
entries | expected | ||
${[]} | ${{}} | ||
${['']} | ${{}} | ||
${[['', 'b']]} | ${{}} | ||
${['a']} | ${{ a: null }} | ||
${[['a']]} | ${{ a: null }} | ||
${[['a', 'b']]} | ${{ a: 'b' }} | ||
${[['a', 'b', 'c']]} | ${{ a: ['b', 'c'] }} | ||
${{ a: ['b'] }} | ${{ a: 'b' }} | ||
${{ a: 'b,c' }} | ${{ a: ['b', 'c'] }} | ||
`('processEntriesToTyposDef $entries', ({ entries, expected }) => { | ||
const result = processEntriesToTyposDef(entries); | ||
expect(result).toEqual(expected); | ||
}); | ||
|
||
test.each` | ||
entries | ||
${[['a', ['b']]]} | ||
${{ a: {} }} | ||
`('processEntriesToTyposDef errors $entries', ({ entries }) => { | ||
expect(() => processEntriesToTyposDef(entries)).toThrow(); | ||
}); | ||
}); |
126 changes: 126 additions & 0 deletions
126
packages/cspell-dictionary/src/SpellingDictionary/Typos/typosParser.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
import assert from 'assert'; | ||
import { TypoEntry, TyposDef, TyposDefValue } from './typos'; | ||
import { appendToDef, createTyposDef } from './util'; | ||
|
||
function assertString(v: unknown): v is string { | ||
assert(typeof v === 'string', 'A string was expected.'); | ||
return true; | ||
} | ||
|
||
const suggestionsSeparator = /[,]/; | ||
const typoSuggestionsSeparator = /:|->/; | ||
const typoEntrySeparator = /[\n;]/; | ||
const inlineComment = /#.*/gm; | ||
|
||
export function createTyposDefFromEntries(entries: Iterable<TypoEntry>): TyposDef { | ||
const def: TyposDef = Object.create(null); | ||
|
||
for (const entry of entries) { | ||
appendToDef(def, entry); | ||
} | ||
|
||
return def; | ||
} | ||
|
||
function normalize(s: string): string { | ||
return s.normalize(); | ||
} | ||
|
||
function trimAndFilter(lines: readonly string[]): string[] { | ||
return lines | ||
.map((s) => s.trim()) | ||
.filter((s) => !!s) | ||
.map(normalize); | ||
} | ||
|
||
function cleanSugs(rawSugs: readonly string[]): TyposDefValue { | ||
const sugs = trimAndFilter(rawSugs); | ||
return sugs.length === 1 ? sugs[0] : sugs.length ? sugs : null; | ||
} | ||
|
||
function splitSuggestionsValue(value: string): TyposDefValue { | ||
return cleanSugs(value.split(suggestionsSeparator)); | ||
} | ||
|
||
export function sanitizeIntoTypoDef(dirtyDef: TyposDef | Record<string, unknown> | unknown): TyposDef | undefined { | ||
if (!dirtyDef || typeof dirtyDef !== 'object') return undefined; | ||
|
||
const def = createTyposDef(); | ||
|
||
for (const [rawKey, value] of Object.entries(dirtyDef)) { | ||
const key = normalize(rawKey.trim()); | ||
if (!key) continue; | ||
if (typeof value === 'string') { | ||
def[key] = splitSuggestionsValue(value); | ||
continue; | ||
} | ||
if (Array.isArray(value)) { | ||
const sugs = cleanSugs(value.filter(assertString)); | ||
def[key] = sugs; | ||
continue; | ||
} | ||
assert(value === null || value === undefined, 'Unexpected suggestion type.'); | ||
def[key] = null; | ||
} | ||
|
||
return def; | ||
} | ||
|
||
/** | ||
* Used to process entries found in a `cspell.json` file. | ||
* @param entries - entries to process | ||
* @returns a TyposDef | ||
*/ | ||
export function processEntriesToTyposDef(entries: TyposDef | readonly TypoEntry[] | Record<string, unknown>): TyposDef { | ||
const def = Array.isArray(entries) ? reduceToTyposDef(entries) : entries; | ||
const result = sanitizeIntoTypoDef(def); | ||
assert(result); | ||
return result; | ||
} | ||
|
||
function reduceToTyposDef(entries: Iterable<TypoEntry>): TyposDef { | ||
const def = createTyposDef(); | ||
for (const entry of entries) { | ||
appendToDef(def, parseTyposLine(entry)); | ||
} | ||
return def; | ||
} | ||
|
||
/** | ||
* Tries to parse an entry. | ||
* @param line - any valid TypoEntry. | ||
* @returns a valid TypoEntry | ||
*/ | ||
export function parseTyposLine(line: TypoEntry): TypoEntry | undefined { | ||
if (!line) return undefined; | ||
if (typeof line === 'string') { | ||
const def = createTyposDef(); | ||
for (const subEntry of splitIntoLines(line)) { | ||
const [left, right] = splitEntry(subEntry); | ||
const typo = left.trim(); | ||
if (!right) return typo; | ||
const sugs = splitSuggestionsValue(right); | ||
def[typo] = sugs; | ||
} | ||
return def; | ||
} | ||
if (Array.isArray(line)) { | ||
const [key, ...sugs] = line.filter(assertString).map((s) => s.trim()); | ||
if (!key) return undefined; | ||
return [key, ...sugs]; | ||
} | ||
return sanitizeIntoTypoDef(line); | ||
} | ||
|
||
function splitIntoLines(content: string): string[] { | ||
return trimAndFilter(normalize(content).split(typoEntrySeparator)); | ||
} | ||
|
||
function splitEntry(line: string): readonly [string, string | undefined] { | ||
return line.split(typoSuggestionsSeparator, 2) as [string, string]; | ||
} | ||
|
||
export function parseTyposFile(content: string): TyposDef { | ||
const lines = splitIntoLines(content.replace(inlineComment, '')); | ||
return reduceToTyposDef(lines); | ||
} |
47 changes: 47 additions & 0 deletions
47
packages/cspell-dictionary/src/SpellingDictionary/Typos/util.test.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
import { appendToDef, createTyposDef, extractAllSuggestions, extractIgnoreValues } from './util'; | ||
|
||
describe('typos/util', () => { | ||
test.each` | ||
def | entry | expected | ||
${{}} | ${''} | ${{}} | ||
${{ a: 'b' }} | ${'a'} | ${{ a: null }} | ||
${{}} | ${['a']} | ${{ a: null }} | ||
${{}} | ${['a', 'b']} | ${{ a: 'b' }} | ||
${{}} | ${['a', 'b', 'c']} | ${{ a: ['b', 'c'] }} | ||
${{ a: 'aa', b: 'bb' }} | ${{ a: 'aaa' }} | ${{ a: 'aaa', b: 'bb' }} | ||
`('appendToDef', ({ def, entry, expected }) => { | ||
expect(appendToDef(def, entry)).toEqual(expected); | ||
}); | ||
|
||
test.each` | ||
entries | expected | ||
${[]} | ${{}} | ||
${undefined} | ${{}} | ||
${[['a', null]]} | ${{ a: null }} | ||
${[['a', 'b']]} | ${{ a: 'b' }} | ||
${[['a', ['b']]]} | ${{ a: ['b'] }} | ||
${[['a', ['b', 'c']]]} | ${{ a: ['b', 'c'] }} | ||
`('parseTyposFile $entries', ({ entries, expected }) => { | ||
const result = createTyposDef(entries); | ||
expect(result).toEqual(expected); | ||
}); | ||
|
||
test.each` | ||
typos | expected | ||
${{}} | ${[]} | ||
${{ a: null, b: undefined, c: 'cc', d: ['dd', 'ee'] }} | ${['cc', 'dd', 'ee']} | ||
`('extractAllSuggestions $typos', ({ typos, expected }) => { | ||
const r = extractAllSuggestions(typos); | ||
expect(r).toEqual(new Set(expected)); | ||
}); | ||
|
||
test.each` | ||
typos | expected | ||
${{}} | ${[]} | ||
${{ a: null, b: undefined, c: 'cc', d: ['dd', 'ee'] }} | ${['cc', 'dd', 'ee']} | ||
${{ '!a': null, '!b': undefined, c: 'cc', d: ['dd', 'ee'] }} | ${['cc', 'dd', 'ee', 'a', 'b']} | ||
`('extractIgnoreValues $typos', ({ typos, expected }) => { | ||
const r = extractIgnoreValues(typos, '!'); | ||
expect(r).toEqual(new Set(expected)); | ||
}); | ||
}); |
60 changes: 60 additions & 0 deletions
60
packages/cspell-dictionary/src/SpellingDictionary/Typos/util.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
import { opConcatMap, opFilter, pipe, reduce } from '@cspell/cspell-pipe/sync'; | ||
import { TypoEntry, TyposDef, TyposDefKey, TyposDefValue } from './typos'; | ||
|
||
/** | ||
* Append an entry to a TyposDef. | ||
* @param def - modified in place | ||
* @param entry- entry to add. | ||
* @returns def | ||
*/ | ||
export function appendToDef(def: TyposDef, entry: TypoEntry | undefined): TyposDef { | ||
if (!entry) return def; | ||
if (typeof entry === 'string') { | ||
def[entry] = null; | ||
return def; | ||
} | ||
if (Array.isArray(entry)) { | ||
const [key, ...sugs] = entry.map((s) => s.trim()); | ||
if (!key) return def; | ||
const s = sugs.map((s) => s.trim()).filter((s) => !!s); | ||
def[key] = !s.length ? null : s.length === 1 ? s[0] : s; | ||
return def; | ||
} | ||
|
||
Object.assign(def, entry); | ||
return def; | ||
} | ||
|
||
export function createTyposDef(entries?: Iterable<[TyposDefKey, TyposDefValue]>): TyposDef { | ||
const def: TyposDef = Object.create(null); | ||
|
||
if (!entries) return def; | ||
|
||
for (const [key, value] of entries) { | ||
def[key] = value; | ||
} | ||
|
||
return def; | ||
} | ||
|
||
export function extractAllSuggestions(typosDef: TyposDef): Set<string> { | ||
const allSugs = pipe( | ||
Object.values(typosDef), | ||
opFilter(isDefined), | ||
opConcatMap((v) => (Array.isArray(v) ? v : [v])) | ||
); | ||
return new Set(allSugs); | ||
} | ||
|
||
export function extractIgnoreValues(typosDef: TyposDef, ignorePrefix: string): Set<string> { | ||
const sugs = extractAllSuggestions(typosDef); | ||
const pfxLen = ignorePrefix.length; | ||
const ignoreKeys = Object.keys(typosDef) | ||
.filter((k) => k.startsWith(ignorePrefix)) | ||
.map((k) => k.slice(pfxLen)); | ||
return reduce(ignoreKeys, (sugs, word) => sugs.add(word), sugs); | ||
} | ||
|
||
function isDefined<T>(v: T | undefined | null): v is T { | ||
return v !== undefined && v !== null; | ||
} |
Oops, something went wrong.