Skip to content

Commit

Permalink
Merge d1ebe47 into 727999d
Browse files Browse the repository at this point in the history
  • Loading branch information
Jason3S committed Jun 18, 2023
2 parents 727999d + d1ebe47 commit 1f14c46
Show file tree
Hide file tree
Showing 19 changed files with 581 additions and 195 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@ import type { ParsedText } from '@cspell/cspell-types';
import type { CachingDictionary, SearchOptions, SpellingDictionary } from 'cspell-dictionary';
import { createCachingDictionary } from 'cspell-dictionary';

import type { ValidationIssue } from '../Models/ValidationIssue.js';
import * as RxPat from '../Settings/RegExpPatterns.js';
import * as Text from '../util/text.js';
import { clean } from '../util/util.js';
import { split } from '../util/wordSplitter.js';
import { defaultMinWordLength } from './defaultConstants.js';
import type { ValidationIssue } from './index.js';
import { isWordValidWithEscapeRetry } from './isWordValid.js';
import { mapRangeBackToOriginalPos } from './parsedText.js';
import type {
Expand Down
161 changes: 84 additions & 77 deletions packages/cspell-trie-lib/api/api.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,83 @@ import { SuggestionCostMapDef, DictionaryDefinitionAugmented } from '@cspell/csp
export { SuggestionCostMapDef } from '@cspell/cspell-types';
import { Operator } from '@cspell/cspell-pipe/sync';

/**
* Costs are minimized while penalties are maximized.
*/
interface Cost$1 {
/**
* The cost of an operation
* `c'' = min(c, c')`
*/
c?: number | undefined;
/**
* The penalties applied
* `p'' = max(p, p')`
*/
p?: number | undefined;
}
interface TrieCost extends Cost$1 {
/** nested trie nodes */
n?: Record<string, TrieCost>;
}
interface TrieTrieCost {
/** nested trie nodes */
n?: Record<string, TrieTrieCost>;
/** root of cost trie */
t?: Record<string, TrieCost>;
}
interface CostPosition {
a: string;
ai: number;
b: string;
bi: number;
c: number;
p: number;
}
interface WeightMap {
readonly insDel: TrieCost;
readonly replace: TrieTrieCost;
readonly swap: TrieTrieCost;
readonly adjustments: Map<string, PenaltyAdjustment>;
calcInsDelCosts(pos: CostPosition): Iterable<CostPosition>;
calcSwapCosts(pos: CostPosition): Iterable<CostPosition>;
calcReplaceCosts(pos: CostPosition): Iterable<CostPosition>;
calcAdjustment(word: string): number;
}
interface PenaltyAdjustment {
/** Penalty Identifier */
id: string;
/** RegExp Pattern to match */
regexp: RegExp;
/** Penalty to apply */
penalty: number;
}

/**
* Calculate the edit distance between any two words.
* Use the Damerau–Levenshtein distance algorithm.
* @param wordA
* @param wordB
* @param editCost - the cost of each edit (defaults to 100)
* @returns the edit distance.
*/
declare function editDistance(wordA: string, wordB: string, editCost?: number): number;
/**
* Calculate the weighted edit distance between any two words.
* @param wordA
* @param wordB
* @param weights - the weights to use
* @param editCost - the cost of each edit (defaults to 100)
* @returns the edit distance
*/
declare function editDistanceWeighted(wordA: string, wordB: string, weights: WeightMap, editCost?: number): number;
/**
* Collect Map definitions into a single weighted map.
* @param defs - list of definitions
* @returns A Weighted Map to be used with distance calculations.
*/
declare function createWeightedMap(defs: SuggestionCostMapDef[]): WeightMap;

/**
* Make all properties in T optional and Possibly undefined
*/
Expand Down Expand Up @@ -111,84 +188,12 @@ interface YieldResult {
node: ITrieNode;
depth: number;
}
type WalkerIterator = Generator<YieldResult, void, boolean | undefined>;

/**
* Costs are minimized while penalties are maximized.
*/
interface Cost$1 {
/**
* The cost of an operation
* `c'' = min(c, c')`
*/
c?: number | undefined;
/**
* The penalties applied
* `p'' = max(p, p')`
*/
p?: number | undefined;
}
interface TrieCost extends Cost$1 {
/** nested trie nodes */
n?: Record<string, TrieCost>;
}
interface TrieTrieCost {
/** nested trie nodes */
n?: Record<string, TrieTrieCost>;
/** root of cost trie */
t?: Record<string, TrieCost>;
}
interface CostPosition {
a: string;
ai: number;
b: string;
bi: number;
c: number;
p: number;
}
interface WeightMap {
readonly insDel: TrieCost;
readonly replace: TrieTrieCost;
readonly swap: TrieTrieCost;
readonly adjustments: Map<string, PenaltyAdjustment>;
calcInsDelCosts(pos: CostPosition): Iterable<CostPosition>;
calcSwapCosts(pos: CostPosition): Iterable<CostPosition>;
calcReplaceCosts(pos: CostPosition): Iterable<CostPosition>;
calcAdjustment(word: string): number;
}
interface PenaltyAdjustment {
/** Penalty Identifier */
id: string;
/** RegExp Pattern to match */
regexp: RegExp;
/** Penalty to apply */
penalty: number;
}

/**
* Calculate the edit distance between any two words.
* Use the Damerau–Levenshtein distance algorithm.
* @param wordA
* @param wordB
* @param editCost - the cost of each edit (defaults to 100)
* @returns the edit distance.
*/
declare function editDistance(wordA: string, wordB: string, editCost?: number): number;
type FalseToNotGoDeeper = boolean;
/**
* Calculate the weighted edit distance between any two words.
* @param wordA
* @param wordB
* @param weights - the weights to use
* @param editCost - the cost of each edit (defaults to 100)
* @returns the edit distance
* By default a Walker Iterator will go depth first. To prevent the
* walker from going deeper use `iterator.next(false)`.
*/
declare function editDistanceWeighted(wordA: string, wordB: string, weights: WeightMap, editCost?: number): number;
/**
* Collect Map definitions into a single weighted map.
* @param defs - list of definitions
* @returns A Weighted Map to be used with distance calculations.
*/
declare function createWeightedMap(defs: SuggestionCostMapDef[]): WeightMap;
type WalkerIterator = Generator<YieldResult, void, FalseToNotGoDeeper | undefined>;

/**
* Ask for the next result.
Expand Down Expand Up @@ -247,7 +252,7 @@ interface SuggestionOptionsStrict extends GenSuggestionOptionsStrict {
/**
* Apply weights to improve the suggestions.
*/
weightMap?: WeightMap;
weightMap?: WeightMap | undefined;
}
type SuggestionOptions = Partial<SuggestionOptionsStrict>;

Expand Down Expand Up @@ -367,6 +372,7 @@ interface TrieData {
info: Readonly<TrieInfo>;
words(): Iterable<string>;
getRoot(): ITrieNodeRoot;
getNode(prefix: string): ITrieNode | undefined;
has(word: string): boolean;
isForbiddenWord(word: string): boolean;
hasForbiddenWords(): boolean;
Expand Down Expand Up @@ -446,6 +452,7 @@ interface ITrie {
* On the returned Iterator, calling .next(goDeeper: boolean), allows for controlling the depth.
*/
iterate(): WalkerIterator;
weightMap: WeightMap | undefined;
get isCaseAware(): boolean;
}
interface FindWordOptions$1 {
Expand Down
2 changes: 1 addition & 1 deletion packages/cspell-trie-lib/src/lib/ITrie.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ describe('Validate Trie Class', () => {
const trie = ITrie.create(sampleWords);
// cspell:ignore joyostalkliftswak
const suggestions = trie.suggest('joyostalkliftswak', { ...NumSuggestions, ...SEPARATE_WORDS });
console.warn('%o', { suggestions });
// console.warn('%o', { suggestions });
expect(suggestions).toEqual(expect.arrayContaining(['joyous talk lifts walk']));
});

Expand Down
7 changes: 6 additions & 1 deletion packages/cspell-trie-lib/src/lib/ITrie.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { opAppend, opFilter, opMap, pipe } from '@cspell/cspell-pipe/sync';

import type { WeightMap } from './distance/index.js';
import type { FindFullResult } from './ITrieNode/find.js';
import { createFindOptions, findLegacyCompound, findWord, findWordNode, isForbiddenWord } from './ITrieNode/find.js';
import type { FindOptions, PartialFindOptions } from './ITrieNode/FindOptions.js';
Expand Down Expand Up @@ -107,6 +108,8 @@ export interface ITrie {
*/
iterate(): WalkerIterator;

weightMap: WeightMap | undefined;

get isCaseAware(): boolean;
}

Expand All @@ -116,6 +119,7 @@ export class ITrieImpl implements ITrie {
private hasForbidden: boolean;
private root: ITrieNodeRoot;
private count?: number;
weightMap: WeightMap | undefined;
constructor(readonly data: TrieData, private numNodes?: number) {
this.root = data.getRoot();
this._info = mergeOptionalWithDefaults(data.info);
Expand Down Expand Up @@ -236,6 +240,7 @@ export class ITrieImpl implements ITrie {
*/
suggestWithCost(text: string, options: SuggestionOptions): SuggestionResult[] {
const sep = options.compoundSeparator;
const weightMap = options.weightMap || this.weightMap;
const adjWord = sep ? replaceAllFactory(sep, '') : (a: string) => a;
const optFilter = options.filter;
const filter = optFilter
Expand All @@ -244,7 +249,7 @@ export class ITrieImpl implements ITrie {
return !this.isForbiddenWord(w) && optFilter(w, cost);
}
: (word: string) => !this.isForbiddenWord(adjWord(word));
const opts = { ...options, filter };
const opts = { ...options, filter, weightMap };
return suggest(this.data, text, opts);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,10 @@ export interface YieldResult {
depth: number;
}

export type WalkerIterator = Generator<YieldResult, void, boolean | undefined>;
export type FalseToNotGoDeeper = boolean;

/**
* By default a Walker Iterator will go depth first. To prevent the
* walker from going deeper use `iterator.next(false)`.
*/
export type WalkerIterator = Generator<YieldResult, void, FalseToNotGoDeeper | undefined>;
10 changes: 8 additions & 2 deletions packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlob.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import type { ITrieNodeRoot } from '../ITrieNode/ITrieNode.js';
import type { ITrieNode, ITrieNodeRoot } from '../ITrieNode/ITrieNode.js';
import { findNode } from '../ITrieNode/trie-util.js';
import type { PartialTrieInfo, TrieInfo } from '../ITrieNode/TrieInfo.js';
import type { TrieData } from '../TrieData.js';
import { mergeOptionalWithDefaults } from '../utils/mergeOptionalWithDefaults.js';
Expand All @@ -15,6 +16,7 @@ export class FastTrieBlob implements TrieData {
private charToIndexMap: CharIndexMap;
private _readonly = false;
private _forbidIdx: number;
private _iTrieRoot: ITrieNodeRoot | undefined;

readonly info: Readonly<TrieInfo>;

Expand Down Expand Up @@ -167,13 +169,17 @@ export class FastTrieBlob implements TrieData {
};

get iTrieRoot(): ITrieNodeRoot {
return FastTrieBlob.toITrieNodeRoot(this);
return (this._iTrieRoot ??= FastTrieBlob.toITrieNodeRoot(this));
}

getRoot(): ITrieNodeRoot {
return this.iTrieRoot;
}

getNode(prefix: string): ITrieNode | undefined {
return findNode(this.getRoot(), prefix);
}

isForbiddenWord(word: string): boolean {
return !!this._forbidIdx && this._has(this._forbidIdx, word);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ class FastTrieBlobINode implements ITrieNode {
readonly node: number[];
readonly eow: boolean;
charToIdx: Record<string, number> | undefined;
private _keys: readonly string[] | undefined;

constructor(readonly trie: FastTrieBlobInternals, readonly nodeIdx: number) {
const node = trie.nodes[nodeIdx];
Expand All @@ -20,8 +21,12 @@ class FastTrieBlobINode implements ITrieNode {
this.id = nodeIdx;
}

keys() {
return (this._keys ??= this.calcKeys());
}

/** get keys to children */
keys(): readonly string[] {
private calcKeys(): readonly string[] {
if (!this.size) return EmptyKeys;
const NodeMaskChildCharIndex = this.trie.NodeMaskChildCharIndex;
const charIndex = this.trie.charIndex;
Expand Down
12 changes: 11 additions & 1 deletion packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { defaultTrieInfo } from '../constants.js';
import type { ITrieNodeRoot } from '../ITrieNode/ITrieNode.js';
import type { ITrieNode, ITrieNodeRoot } from '../ITrieNode/ITrieNode.js';
import { findNode } from '../ITrieNode/trie-util.js';
import type { PartialTrieInfo, TrieInfo } from '../ITrieNode/TrieInfo.js';
import type { TrieData } from '../TrieData.js';
import { mergeOptionalWithDefaults } from '../utils/mergeOptionalWithDefaults.js';
Expand Down Expand Up @@ -40,6 +41,7 @@ export class TrieBlob implements TrieData {
readonly info: Readonly<TrieInfo>;
private _forbidIdx: number | undefined;
private _size: number | undefined;
private _iTrieRoot: ITrieNodeRoot | undefined;

constructor(protected nodes: Uint32Array, protected charIndex: string[], info: PartialTrieInfo) {
this.info = mergeOptionalWithDefaults(info);
Expand All @@ -65,6 +67,10 @@ export class TrieBlob implements TrieData {
}

getRoot(): ITrieNodeRoot {
return (this._iTrieRoot ??= this._getRoot());
}

private _getRoot(): ITrieNodeRoot {
const trieData = new TrieBlobInternals(this.nodes, this.charIndex, this.charToIndexMap, {
NodeMaskEOW: TrieBlob.NodeMaskEOW,
NodeMaskNumChildren: TrieBlob.NodeMaskNumChildren,
Expand All @@ -74,6 +80,10 @@ export class TrieBlob implements TrieData {
return new TrieBlobIRoot(trieData, 0, this.info);
}

getNode(prefix: string): ITrieNode | undefined {
return findNode(this.getRoot(), prefix);
}

private _has(nodeIdx: number, word: string): boolean {
const NodeMaskNumChildren = TrieBlob.NodeMaskNumChildren;
const NodeMaskChildCharIndex = TrieBlob.NodeMaskChildCharIndex;
Expand Down
3 changes: 2 additions & 1 deletion packages/cspell-trie-lib/src/lib/TrieData.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import type { ITrieNodeRoot } from './ITrieNode/ITrieNode.js';
import type { ITrieNode, ITrieNodeRoot } from './ITrieNode/ITrieNode.js';
import type { TrieInfo } from './ITrieNode/TrieInfo.js';

export interface TrieData {
info: Readonly<TrieInfo>;
words(): Iterable<string>;
getRoot(): ITrieNodeRoot;
getNode(prefix: string): ITrieNode | undefined;
has(word: string): boolean;
isForbiddenWord(word: string): boolean;
hasForbiddenWords(): boolean;
Expand Down
7 changes: 6 additions & 1 deletion packages/cspell-trie-lib/src/lib/TrieNode/TrieNodeTrie.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { consolidate } from '../consolidate.js';
import type { ITrieNodeRoot } from '../ITrieNode/ITrieNode.js';
import type { ITrieNode, ITrieNodeRoot } from '../ITrieNode/ITrieNode.js';
import { findNode } from '../ITrieNode/trie-util.js';
import type { PartialTrieOptions, TrieOptions } from '../trie.js';
import type { TrieData } from '../TrieData.js';
import { mergeOptionalWithDefaults } from '../utils/mergeOptionalWithDefaults.js';
Expand All @@ -24,6 +25,10 @@ export class TrieNodeTrie implements TrieData {
return this.iTrieRoot;
}

getNode(prefix: string): ITrieNode | undefined {
return findNode(this.getRoot(), prefix);
}

words(): Iterable<string> {
return iteratorTrieWords(this.root);
}
Expand Down
Loading

0 comments on commit 1f14c46

Please sign in to comment.