Skip to content

Commit

Permalink
Merge 0e56356 into b4aea0c
Browse files Browse the repository at this point in the history
  • Loading branch information
Jason3S committed Jan 15, 2022
2 parents b4aea0c + 0e56356 commit 7668f90
Show file tree
Hide file tree
Showing 14 changed files with 183 additions and 28 deletions.
@@ -1,32 +1,24 @@
import {
Trie,
SuggestionCollector,
suggestionCollector,
SuggestionResult,
CompoundWordsMethod,
importTrie,
FindWordOptions,
} from 'cspell-trie-lib';
import { createMapper } from '../util/repMap';
import type { FindFullResult, FindWordOptions, SuggestionCollector, SuggestionResult } from 'cspell-trie-lib';
import { CompoundWordsMethod, importTrie, suggestionCollector, Trie } from 'cspell-trie-lib';
import { getDefaultSettings } from '../Settings';
import { memorizer } from '../util/Memorizer';
import { createMapper } from '../util/repMap';
import {
FindResult,
HasOptions,
SpellingDictionary,
SpellingDictionaryOptions,
SuggestOptions,
} from './SpellingDictionary';
import {
hasOptionToSearchOption,
wordSearchForms,
SuggestArgs,
defaultNumSuggestions,
hasOptionToSearchOption,
impersonateCollector,
SuggestArgs,
suggestArgsToSuggestOptions,
wordSearchForms,
wordSuggestFormsArray,
} from './SpellingDictionaryMethods';
import {
SpellingDictionary,
HasOptions,
SuggestOptions,
SpellingDictionaryOptions,
FindResult,
} from './SpellingDictionary';
import { FindFullResult } from '../../../cspell-trie-lib/dist/lib/find';
export class SpellingDictionaryFromTrie implements SpellingDictionary {
static readonly cachedWordsLimit = 50000;
private _size = 0;
Expand Down
1 change: 1 addition & 0 deletions packages/cspell-lib/src/index.ts
Expand Up @@ -36,6 +36,7 @@ export {
SuggestionCollector,
SuggestionResult,
SpellingDictionaryCollection,
SuggestOptions,
} from './SpellingDictionary';
export * from './trace';
export { getLogger, Logger, setLogger } from './util/logger';
Expand Down
53 changes: 53 additions & 0 deletions packages/cspell-trie-lib/src/lib/distance/distance.test.ts
@@ -0,0 +1,53 @@
import { editDistance, editDistanceWeighted, createWeightedMap, updatedWeightedMap } from './distance';

describe('distance', () => {
test.each`
wordA | wordB | expected
${''} | ${''} | ${0}
${'ab'} | ${'ba'} | ${100}
${'bite'} | ${'bate'} | ${100}
`('editDistance "$wordA" vs "$wordB"', ({ wordA, wordB, expected }) => {
expect(editDistance(wordA, wordB)).toBe(expected);
expect(editDistance(wordB, wordA)).toBe(expected);
expect(editDistance(wordA, wordB, 200)).toBe(expected * 2);
});

const weights = createWeightedMap([
{
map: 'aeiou', // cspell:disable-line
replace: 50,
insDel: 75,
swap: 45,
},
{
description: 'Vowels',
map: 'aáâäãåeéêëiíîïoóôöõuúûüyÿ', // cspell:disable-line
insDel: 50,
replace: 25, // Replacing one vowel with another is cheap
swap: 25, // Swapping vowels are cheap
},
{
description: 'Vowel Accents',
map: 'aáâäãå|eéêë|iíîï|oóôöõ|uúûü|yÿ', // cspell:disable-line
replace: 10, // Make it cheap to add / remove an accent.
},
]);

updatedWeightedMap(weights, {
map: 't(tt)|p(pp)|e(ee)(ea)|l(ll)|a(aa)|o(oo)(oh)(oa)(ao)(ou)|',
replace: 55,
});

test.each`
wordA | wordB | expected
${''} | ${''} | ${0}
${'ab'} | ${'ba'} | ${100}
${'botle' /* cspell:disable-line */} | ${'bottle'} | ${55}
${'cafe'} | ${'café'} | ${10}
${'tee'} | ${'tea'} | ${25}
${'trie'} | ${'tree'} | ${25}
`('editDistance "$wordA" vs "$wordB"', ({ wordA, wordB, expected }) => {
expect(editDistanceWeighted(wordA, wordB, weights)).toBe(expected);
expect(editDistanceWeighted(wordB, wordA, weights)).toBe(expected);
});
});
60 changes: 60 additions & 0 deletions packages/cspell-trie-lib/src/lib/distance/distance.ts
@@ -0,0 +1,60 @@
import { distanceAStarWeighted } from './distanceAStarWeighted';
import { levenshteinDistance } from './levenshtein';
import type { WeightedMapDef, WeightedMapTrie } from './weightedMaps';
import { addWeightedDefMapToTrie, buildWeightedMapTrie } from './weightedMaps';

export type { WeightedMapDef } from './weightedMaps';

const defaultCost = 100;

/**
* Calculate the edit distance between any two words.
* Use the Damerau–Levenshtein distance algorithm.
* @param wordA
* @param wordB
* @param editCost - the cost of each edit (defaults to 100)
* @returns the edit distance.
*/
export function editDistance(wordA: string, wordB: string, editCost = defaultCost): number {
return levenshteinDistance(wordA, wordB) * editCost;
}

/**
* Calculate the weighted edit distance between any two words.
* @param wordA
* @param wordB
* @param weights - the weights to use
* @param editCost - the cost of each edit (defaults to 100)
* @returns the edit distance
*/
export function editDistanceWeighted(
wordA: string,
wordB: string,
weights: WeightedMap,
editCost = defaultCost
): number {
return distanceAStarWeighted(wordA, wordB, weights, editCost);
}

/**
* A Weighted map used by weighted distance calculations.
*/
export type WeightedMap = WeightedMapTrie;

/**
* Collect Map definitions into a single weighted map.
* @param defs - list of definitions
* @returns A Weighted Map to be used with distance calculations.
*/
export function createWeightedMap(defs: WeightedMapDef[]): WeightedMap {
return buildWeightedMapTrie(defs);
}

/**
* Update a WeightedMap with a WeightedMapDef
* @param weightedMap - map to update
* @param def - the definition to use
*/
export function updatedWeightedMap(weightedMap: WeightedMap, def: WeightedMapDef): void {
addWeightedDefMapToTrie(def, weightedMap);
}
Expand Up @@ -6,10 +6,9 @@ import { WeightedMapTrie, WeightedRepMapTrie } from './weightedMaps';
*
* Using basic weights, this algorithm has the same results as the Damerau-Levenshtein algorithm.
*/
export function distanceAStarWeighted(a: string, b: string, map: WeightedMapTrie): number {
export function distanceAStarWeighted(a: string, b: string, map: WeightedMapTrie, cost = 100): number {
const aN = a.length;
const bN = b.length;
const cost = 100;

const candidates = new PairingHeap(compare);

Expand Down
2 changes: 2 additions & 0 deletions packages/cspell-trie-lib/src/lib/distance/index.ts
@@ -0,0 +1,2 @@
export { editDistance, createWeightedMap, editDistanceWeighted } from './distance';
export type { WeightedMap, WeightedMapDef } from './distance';
Expand Up @@ -20,7 +20,36 @@ interface WeightedRepTrieNode {
swap?: number | undefined;
}

export interface WeightedMapDef {
// cspell:ignore aeiouy
/**
* A WeightedMapDef enables setting weights for edits between related characters and substrings.
*
* Multiple groups can be defined using a `|`.
* A multi-character substring is defined using `()`.
*
* For example, in some languages, some letters sound alike.
*
* ```ts
* {
* map: 'sc(sh)(sch)(ss)|t(tt)', // two groups.
* replace: 50, // Make it 1/2 the cost of a normal edit to replace a `t` with `tt`.
* }
* ```
*
* The following could be used to make inserting, removing, or replacing vowels cheaper.
* ```ts
* {
* map: 'aeiouy', //.
* insDel: 50, // Make it is cheaper to insert or delete a vowel.
* replace: 45, // It is even cheaper to replace one with another.
* }
* ```
*
* Note: the default edit distance is 100.
*/
export type WeightedMapDef = WeightedMapDefReplace | WeightMapDefInsDel | WeightMapDefSwap;

interface WeightedMapDefBase {
/**
* The set of substrings to map, these are generally single character strings.
*
Expand All @@ -47,6 +76,22 @@ export interface WeightedMapDef {
* This represents the cost to change `ei` to `ie` or the reverse.
*/
swap?: number;
/**
* A description to describe the purpose of the map.
*/
description?: string;
}

interface WeightedMapDefReplace extends WeightedMapDefBase {
replace: number;
}

interface WeightMapDefInsDel extends WeightedMapDefBase {
insDel: number;
}

interface WeightMapDefSwap extends WeightedMapDefBase {
swap: number;
}

export function buildWeightedMapTrie(defs: WeightedMapDef[]): WeightedMapTrie {
Expand Down Expand Up @@ -133,7 +178,7 @@ function lowest(a: number | undefined, b: number | undefined): number | undefine
* Splits a WeightedMapDef.map
* @param map
*/
function splitMap(def: WeightedMapDef): string[][] {
function splitMap(def: WeightedMapDefBase): string[][] {
const { map } = def;

const sets = map.split('|');
Expand Down
9 changes: 6 additions & 3 deletions packages/cspell-trie-lib/src/lib/index.ts
@@ -1,4 +1,7 @@
export { consolidate } from './consolidate';
export { createWeightedMap, editDistance, editDistanceWeighted } from './distance';
export type { WeightedMap, WeightedMapDef } from './distance';
export type { FindFullResult } from './find';
export { ExportOptions, importTrie, serializeTrie } from './io/importExport';
export { parseDictionary, parseDictionaryLines } from './SimpleDictionaryParser';
export { MaxCost, suggestionCollector, SuggestionCollector, SuggestionResult } from './suggestCollector';
Expand All @@ -24,20 +27,20 @@ export {
has,
insert,
isCircular,
isDefined,
isWordTerminationNode,
iterateTrie,
iteratorTrieWords,
mergeDefaults,
mergeOptionalWithDefaults,
normalizeWord,
normalizeWordToLowercase,
normalizeWordForCaseInsensitive,
isDefined,
normalizeWordToLowercase,
orderTrie,
trieNodeToRoot,
walk,
} from './trie-util';
export { buildTrie, buildTrieFast, TrieBuilder } from './TrieBuilder';
export { ChildMap, FLAG_WORD, TrieNode, TrieRoot } from './TrieNode';
export { hintedWalker, JOIN_SEPARATOR, walker, WORD_SEPARATOR, CompoundWordsMethod } from './walker';
export { CompoundWordsMethod, hintedWalker, JOIN_SEPARATOR, walker, WORD_SEPARATOR } from './walker';
export type { HintedWalkerIterator, Hinting, WalkerIterator, YieldResult } from './walker';

0 comments on commit 7668f90

Please sign in to comment.