Skip to content

Commit

Permalink
fix: Improve suggestions (#2426)
Browse files Browse the repository at this point in the history
Improve suggestions by adding a penalty for
- changing case in a compound words
- lots of short compounds.
  • Loading branch information
Jason3S committed Feb 9, 2022
1 parent e9ae045 commit 51d5a71
Show file tree
Hide file tree
Showing 13 changed files with 333 additions and 120 deletions.
35 changes: 33 additions & 2 deletions cspell.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -403,8 +403,14 @@
"type": "array"
}
],
"default": "\\u0300-\\u0341",
"description": "The accent characters"
"description": "The accent characters.\n\nDefault: `\"\\u0300-\\u0341\"`"
},
"adjustments": {
"description": "A collection of patterns to test against the suggested words. If the word matches the pattern, then the penalty is applied.",
"items": {
"$ref": "#/definitions/PatternAdjustment"
},
"type": "array"
},
"alphabet": {
"anyOf": [
Expand Down Expand Up @@ -948,6 +954,29 @@
"Pattern": {
"type": "string"
},
"PatternAdjustment": {
"additionalProperties": false,
"properties": {
"id": {
"description": "Id of the Adjustment, i.e. `short-compound`",
"type": "string"
},
"penalty": {
"description": "The amount of penalty to apply.",
"type": "number"
},
"regexp": {
"description": "RegExp pattern to match",
"type": "string"
}
},
"required": [
"id",
"regexp",
"penalty"
],
"type": "object"
},
"PatternId": {
"description": "This matches the name in a pattern definition.",
"type": "string"
Expand Down Expand Up @@ -1128,6 +1157,8 @@
},
"properties": {
"$schema": {
"default": "https://raw.githubusercontent.com/streetsidesoftware/cspell/main/cspell.schema.json",
"description": "Url to JSON Schema",
"type": "string"
},
"allowCompoundWords": {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,95 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP

exports[`distanceAStar distanceAStar Asymmetrical penalties adv "walk-around" "walk∙Around" $map 1`] = `
"<walk-around> -> <walk∙Around> (101[+1000])
a: |<^>|<w>|<a>|<l>|<k>|<->|<a>|<r>|<o>|<u>|<n>|<d>|<$>| |
b: |<^>|<w>|<a>|<l>|<k>|<∙>|<A>|<r>|<o>|<u>|<n>|<d>|<$>| |
c: | 0| 0| 0| 0| 0|100| 1| 0| 0| 0| 0| 0| 0| = 101|
p: | 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| = 0|
"
`;

exports[`distanceAStar distanceAStar Asymmetrical penalties adv "walk-around" "walk∙Around" $map 2`] = `
"<walk∙Around> -> <walk-around> (101)
a: |<^>|<w>|<a>|<l>|<k>|<∙>|<A>|<r>|<o>|<u>|<n>|<d>|<$>| |
b: |<^>|<w>|<a>|<l>|<k>|<->|<a>|<r>|<o>|<u>|<n>|<d>|<$>| |
c: | 0| 0| 0| 0| 0|100| 1| 0| 0| 0| 0| 0| 0| = 101|
p: | 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| = 0|
"
`;

exports[`distanceAStar distanceAStar Asymmetrical penalties adv "walked" "walked" $map 1`] = `
"<walked> -> <walked> (0)
a: |<^>|<w>|<a>|<l>|<k>|<e>|<d>|<$>| |
b: |<^>|<w>|<a>|<l>|<k>|<e>|<d>|<$>| |
c: | 0| 0| 0| 0| 0| 0| 0| 0| = 0|
p: | 0| 0| 0| 0| 0| 0| 0| 0| = 0|
"
`;

exports[`distanceAStar distanceAStar Asymmetrical penalties adv "walked" "walked" $map 2`] = `
"<walked> -> <walked> (0)
a: |<^>|<w>|<a>|<l>|<k>|<e>|<d>|<$>| |
b: |<^>|<w>|<a>|<l>|<k>|<e>|<d>|<$>| |
c: | 0| 0| 0| 0| 0| 0| 0| 0| = 0|
p: | 0| 0| 0| 0| 0| 0| 0| 0| = 0|
"
`;

exports[`distanceAStar distanceAStar Asymmetrical penalties adv "walked" "walk∙ED" $map 1`] = `
"<walked> -> <walk∙ED> (102[+1100])
a: |<^>|<w>|<a>|<l>|<k>|<> |<e>|<d>|<$>| |
b: |<^>|<w>|<a>|<l>|<k>|<∙>|<E>|<D>|<$>| |
c: | 0| 0| 0| 0| 0|100| 1| 1| 0| = 102|
p: | 0| 0| 0| 0| 0| 0| 0| 0| 0| = 0|
"
`;

exports[`distanceAStar distanceAStar Asymmetrical penalties adv "walked" "walk∙ED" $map 2`] = `
"<walk∙ED> -> <walked> (102)
a: |<^>|<w>|<a>|<l>|<k>|<∙>|<E>|<D>|<$>| |
b: |<^>|<w>|<a>|<l>|<k>|<> |<e>|<d>|<$>| |
c: | 0| 0| 0| 0| 0|100| 1| 1| 0| = 102|
p: | 0| 0| 0| 0| 0| 0| 0| 0| 0| = 0|
"
`;

exports[`distanceAStar distanceAStar Asymmetrical penalties adv "walked" "walk∙ed" $map 1`] = `
"<walked> -> <walk∙ed> (100)
a: |<^>|<w>|<a>|<l>|<k>|<> |<e>|<d>|<$>| |
b: |<^>|<w>|<a>|<l>|<k>|<∙>|<e>|<d>|<$>| |
c: | 0| 0| 0| 0| 0|100| 0| 0| 0| = 100|
p: | 0| 0| 0| 0| 0| 0| 0| 0| 0| = 0|
"
`;

exports[`distanceAStar distanceAStar Asymmetrical penalties adv "walked" "walk∙ed" $map 2`] = `
"<walk∙ed> -> <walked> (100)
a: |<^>|<w>|<a>|<l>|<k>|<∙>|<e>|<d>|<$>| |
b: |<^>|<w>|<a>|<l>|<k>|<> |<e>|<d>|<$>| |
c: | 0| 0| 0| 0| 0|100| 0| 0| 0| = 100|
p: | 0| 0| 0| 0| 0| 0| 0| 0| 0| = 0|
"
`;

exports[`distanceAStar distanceAStar Asymmetrical penalties adv "walked" "walk∙ed" $map 3`] = `
"<walked> -> <walk∙ed> (100[+100])
a: |<^>|<w>|<a>|<l>|<k>|<> |<e>|<d>|<$>| |
b: |<^>|<w>|<a>|<l>|<k>|<∙>|<e>|<d>|<$>| |
c: | 0| 0| 0| 0| 0|100| 0| 0| 0| = 100|
p: | 0| 0| 0| 0| 0| 0| 0| 0| 0| = 0|
"
`;

exports[`distanceAStar distanceAStar Asymmetrical penalties adv "walked" "walk∙ed" $map 4`] = `
"<walk∙ed> -> <walked> (100)
a: |<^>|<w>|<a>|<l>|<k>|<∙>|<e>|<d>|<$>| |
b: |<^>|<w>|<a>|<l>|<k>|<> |<e>|<d>|<$>| |
c: | 0| 0| 0| 0| 0|100| 0| 0| 0| = 100|
p: | 0| 0| 0| 0| 0| 0| 0| 0| 0| = 0|
"
`;

exports[`distanceAStar distanceAStar adv "" "" $map 1`] = `
"<> -> <> (0)
a: |<^>|<$>| |
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { mapDictionaryInformationToWeightMap } from '..';
import type { SuggestionCostMapDef } from '../models/suggestionCostsDef';
import { distanceAStarWeighted, distanceAStarWeightedEx } from './distanceAStarWeighted';
import { formatExResult } from './formatResultEx';
Expand Down Expand Up @@ -140,6 +141,37 @@ describe('distanceAStar', () => {
expect(r2?.cost).toBe(expected);
}
);

test.each`
wordA | wordB | weightMap | expectedAB | expectedBA
${'walked'} | ${'walked'} | ${calcDefWeightMap()} | ${0} | ${0}
${'walked'} | ${'walk∙ed'} | ${calcWeightMap()} | ${100} | ${100}
${'walked'} | ${'walk∙ed'} | ${calcDefWeightMap()} | ${200} | ${100}
${'walked'} | ${'walk∙ED'} | ${calcDefWeightMap()} | ${1202} | ${102}
${'walk-around'} | ${'walk∙Around'} | ${calcDefWeightMap()} | ${1101} | ${101}
`(
'distanceAStar Asymmetrical penalties adv "$wordA" "$wordB" $map',
({
wordA,
wordB,
weightMap,
expectedAB,
expectedBA,
}: {
wordA: string;
wordB: string;
weightMap: WeightMap;
expectedAB: number;
expectedBA: number;
}) => {
const r1 = distanceAStarWeightedEx(wordA, wordB, weightMap);
const r2 = distanceAStarWeightedEx(wordB, wordA, weightMap);
expect(formatExResult(r1)).toMatchSnapshot();
expect(formatExResult(r2)).toMatchSnapshot();
expect(r1?.cost).toBe(expectedAB);
expect(r2?.cost).toBe(expectedBA);
}
);
});

function mapLetters(cost = 50): SuggestionCostMapDef {
Expand All @@ -157,6 +189,10 @@ function mapLetters(cost = 50): SuggestionCostMapDef {
};
}

function calcDefWeightMap(): WeightMap {
return mapDictionaryInformationToWeightMap({});
}

function calcWeightMap(...defs: SuggestionCostMapDef[]): WeightMap {
return createWeightMap(
...defs,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@ import { WeightMap } from './weightedMaps';
*/
export function distanceAStarWeighted(wordA: string, wordB: string, map: WeightMap, cost = 100): number {
const best = _distanceAStarWeightedEx(wordA, wordB, map, cost);
return best ? best.c + best.p : -1;
const penalty = map.calcAdjustment(wordB);
return best.c + best.p + penalty;
}

export interface ExResult {
a: string;
b: string;
cost: number;
penalty: number;
segments: {
a: string;
b: string;
Expand All @@ -30,10 +32,13 @@ export function distanceAStarWeightedEx(wordA: string, wordB: string, map: Weigh
const aa = '^' + wordA + '$';
const bb = '^' + wordB + '$';

const penalty = map.calcAdjustment(wordB);

const result: ExResult = {
a: aa,
b: bb,
cost: best.c + best.p,
cost: best.c + best.p + penalty,
penalty,
segments: [],
};
const segments = result.segments;
Expand Down
7 changes: 5 additions & 2 deletions packages/cspell-trie-lib/src/lib/distance/formatResultEx.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ function vizWidth(s: string) {
export function formatExResult(ex: ExResult | undefined): string {
if (!ex) return '<undefined>';

const { cost, segments } = ex;
const { cost, segments, penalty } = ex;
const asString = segments.map(({ a, b, c, p }) => ({
a: `<${a}>`,
b: `<${b}>`,
Expand All @@ -49,7 +49,10 @@ export function formatExResult(ex: ExResult | undefined): string {
const b = 'b: |' + parts.map(({ b, w }) => pR(b, w)).join('|') + '|';
const c = 'c: |' + parts.map(({ c, w }) => pL(c, w)).join('|') + '|';
const p = 'p: |' + parts.map(({ p, w }) => pL(p, w)).join('|') + '|';
return `<${ex.a.slice(1, -1)}> -> <${ex.b.slice(1, -1)}> (${cost})\n${[a, b, c, p].join('\n')}\n`;
const penaltyMsg = penalty ? `[+${penalty}]` : '';
return `<${ex.a.slice(1, -1)}> -> <${ex.b.slice(1, -1)}> (${cost - penalty}${penaltyMsg})\n${[a, b, c, p].join(
'\n'
)}\n`;
}

export function formattedDistance(wordA: string, wordB: string, weightMap: WeightMap, cost?: number) {
Expand Down
31 changes: 26 additions & 5 deletions packages/cspell-trie-lib/src/lib/distance/weightedMaps.test.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
import type { SuggestionCostMapDef } from '../models/suggestionCostsDef';
import { DEFAULT_COMPOUNDED_WORD_SEPARATOR } from '../suggestions/suggestCollector';
import {
addAdjustment,
addDefToWeightMap,
CostPosition,
createWeightMap,
lookupReplaceCost,
PenaltyAdjustment,
prettyPrintWeightMap,
__testing__,
} from './weightedMaps';

const { splitMapSubstrings, splitMap, findTrieCostPrefixes, normalizeDef } = __testing__;

const oc = expect.objectContaining;

// const u = undefined; cspell:

describe('Validate weightedMaps', () => {
Expand All @@ -37,11 +41,11 @@ describe('Validate weightedMaps', () => {

test.each`
defs | expected
${[]} | ${{ insDel: {}, replace: {}, swap: {} }}
${[defIns('ab', 3)]} | ${{ insDel: { n: { a: { c: 3 }, b: { c: 3 } } }, replace: {}, swap: {} }}
${[defIns('ab', 3), defIns('bc', 2)]} | ${{ insDel: { n: { a: { c: 3 }, b: { c: 2 }, c: { c: 2 } } }, replace: {}, swap: {} }}
${[defRep('ab', 3)]} | ${{ insDel: {}, replace: { n: { a: { t: { n: { b: { c: 3 } } } }, b: { t: { n: { a: { c: 3 } } } } } }, swap: {} }}
${[defSwap('ab', 3)]} | ${{ insDel: {}, replace: {}, swap: { n: { a: { t: { n: { b: { c: 3 } } } }, b: { t: { n: { a: { c: 3 } } } } } } }}
${[]} | ${oc({ insDel: {}, replace: {}, swap: {} })}
${[defIns('ab', 3)]} | ${oc({ insDel: { n: { a: { c: 3 }, b: { c: 3 } } }, replace: {}, swap: {} })}
${[defIns('ab', 3), defIns('bc', 2)]} | ${oc({ insDel: { n: { a: { c: 3 }, b: { c: 2 }, c: { c: 2 } } }, replace: {}, swap: {} })}
${[defRep('ab', 3)]} | ${oc({ insDel: {}, replace: { n: { a: { t: { n: { b: { c: 3 } } } }, b: { t: { n: { a: { c: 3 } } } } } }, swap: {} })}
${[defSwap('ab', 3)]} | ${oc({ insDel: {}, replace: {}, swap: { n: { a: { t: { n: { b: { c: 3 } } } }, b: { t: { n: { a: { c: 3 } } } } } } })}
`('buildWeightMap $defs', ({ defs, expected }) => {
expect(createWeightMap(...defs)).toEqual(expected);
});
Expand Down Expand Up @@ -197,6 +201,19 @@ describe('Validate weightedMaps', () => {
`('normalizeDef for compound separators $def', ({ def, expected }) => {
expect(normalizeDef(def)).toEqual(expected);
});

test.each`
adjustments | word | expected
${[]} | ${'hello'} | ${0}
${[adj('case-change', /\p{Ll}∙\p{Lu}|\p{Lu}∙\p{Ll}/gu, 500)]} | ${'hello∙There'} | ${500}
${[adj('case-change', /\p{Ll}∙\p{Lu}|\p{Lu}∙\p{Ll}/gu, 500)]} | ${'WORK∙ed'} | ${500}
${[adj('case-change', /\p{Ll}∙\p{Lu}|\p{Lu}∙\p{Ll}/gu, 500)]} | ${'WORK∙ed∙Fine'} | ${1000}
${[adj('case-change', /\p{Ll}∙\p{Lu}|\p{Lu}∙\p{Ll}/u, 500)]} | ${'WORK∙ed∙Fine'} | ${500}
`('calcAdjustment $adjustments $word', ({ adjustments, word, expected }) => {
const w = createWeightMap();
addAdjustment(w, ...adjustments);
expect(w.calcAdjustment(word)).toEqual(expected);
});
});

function sep(s: string): string {
Expand Down Expand Up @@ -226,3 +243,7 @@ function defSwap(map: string, swap: number, ...opts: Partial<SuggestionCostMapDe
function mergeOps(opts: Partial<SuggestionCostMapDef>[]): Partial<SuggestionCostMapDef> {
return opts.reduce((acc, opt) => ({ ...acc, ...opt }), {} as Partial<SuggestionCostMapDef>);
}

function adj(id: string, regexp: RegExp, penalty: number): PenaltyAdjustment {
return { id, regexp, penalty };
}
36 changes: 36 additions & 0 deletions packages/cspell-trie-lib/src/lib/distance/weightedMaps.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,21 @@ export interface WeightMap {
readonly insDel: TrieCost;
readonly replace: TrieTrieCost;
readonly swap: TrieTrieCost;
readonly adjustments: Map<string, PenaltyAdjustment>;

calcInsDelCosts(pos: CostPosition): Iterable<CostPosition>;
calcSwapCosts(pos: CostPosition): Iterable<CostPosition>;
calcReplaceCosts(pos: CostPosition): Iterable<CostPosition>;
calcAdjustment(word: string): number;
}

export interface PenaltyAdjustment {
/** Penalty Identifier */
id: string;
/** RegExp Pattern to match */
regexp: RegExp;
/** Penalty to apply */
penalty: number;
}

export function createWeightMap(...defs: SuggestionCostMapDef[]): WeightMap {
Expand All @@ -82,6 +93,15 @@ export function addDefToWeightMap(map: WeightMap, ...defs: SuggestionCostMapDef[
return addDefsToWeightMap(map, defs);
}

export function addAdjustment(map: WeightMap, ...adjustments: PenaltyAdjustment[]): WeightMap;
export function addAdjustment(map: WeightMap, adjustment: PenaltyAdjustment): WeightMap;
export function addAdjustment(map: WeightMap, ...adjustments: PenaltyAdjustment[]): WeightMap {
for (const adj of adjustments) {
map.adjustments.set(adj.id, adj);
}
return map;
}

export function addDefsToWeightMap(map: WeightMap, defs: SuggestionCostMapDef[]): WeightMap {
function addSet(set: string[], def: SuggestionCostMapDef) {
addSetToTrieCost(map.insDel, set, def.insDel, def.penalty);
Expand Down Expand Up @@ -283,6 +303,7 @@ class _WeightedMap implements WeightMap {
insDel: TrieCost = {};
replace: TrieTrieCost = {};
swap: TrieTrieCost = {};
adjustments = new Map<string, PenaltyAdjustment>();

*calcInsDelCosts(pos: CostPosition): Iterable<CostPosition> {
const { a, ai, b, bi, c, p } = pos;
Expand Down Expand Up @@ -320,6 +341,21 @@ class _WeightedMap implements WeightMap {
}
}
}

calcAdjustment(word: string): number {
let penalty = 0;
for (const adj of this.adjustments.values()) {
if (adj.regexp.global) {
for (const _m of word.matchAll(adj.regexp)) {
penalty += adj.penalty;
}
} else if (adj.regexp.test(word)) {
penalty += adj.penalty;
}
}

return penalty;
}
}

function prettyPrintInsDel(trie: TrieCost, pfx = '', indent = ' '): string {
Expand Down
Loading

0 comments on commit 51d5a71

Please sign in to comment.