Skip to content

Commit

Permalink
Support per dictionary compound words.
Browse files Browse the repository at this point in the history
  • Loading branch information
Jason3S committed Sep 30, 2017
1 parent 6fc8985 commit c40ce81
Show file tree
Hide file tree
Showing 7 changed files with 55 additions and 64 deletions.
3 changes: 3 additions & 0 deletions src/Settings/CSpellSettingsDef.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@

import {ReplaceMap} from '../util/repMap';
export {ReplaceMap} from '../util/repMap';

/**
* These settings come from user and workspace settings.
Expand Down Expand Up @@ -141,6 +142,8 @@ export interface DictionaryDefinition {
type?: DictionaryFileTypes;
// Replacement pairs
repMap?: ReplaceMap;
// Use Compounds
useCompounds?: boolean;
}

export interface LanguageSetting extends LanguageSettingFilterFields, BaseSetting {
Expand Down
2 changes: 2 additions & 0 deletions src/SpellingDictionary/DictionaryLoader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ export interface LoadOptions {
type?: LoaderType;
// Replacement Map
repMap?: ReplaceMap;
// Use Compounds
useCompounds?: boolean;
}

export type LoaderType = keyof Loaders;
Expand Down
14 changes: 14 additions & 0 deletions src/SpellingDictionary/SpellingDictionary.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,19 @@ describe('Verify building Dictionary', () => {
expect(suggestions).to.not.contain('banana');
});

it('Test compounds from word list', () => {
const words = [
'apple', 'ape', 'able', 'apple', 'banana', 'orange', 'pear', 'aim', 'approach', 'bear'
];

const dict = createSpellingDictionary(words, 'words', { useCompounds: true });
expect(dict.has('apple')).to.be.true;
// cspell:ignore applebanana applebananas applebananaorange
expect(dict.has('applebanana')).to.be.true;
expect(dict.has('applebananaorange')).to.be.true;
expect(dict.has('applebananas')).to.be.false;
});

it('Test Suggest Trie', () => {
const words = [
'apple', 'ape', 'able', 'apple', 'banana', 'orange', 'pear', 'aim', 'approach', 'bear',
Expand All @@ -53,6 +66,7 @@ describe('Verify building Dictionary', () => {
];
const trie = Trie.create(words);
const dict = new SpellingDictionaryFromTrie(trie, 'trie');
// cspell:ignore cattles
const suggestions = dict.suggest('Cattles').map(({word}) => word);
expect(suggestions[0]).to.be.equal('cattle');
expect(suggestions).to.not.contain('banana');
Expand Down
26 changes: 17 additions & 9 deletions src/SpellingDictionary/SpellingDictionary.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ export type FilterSuggestionsPredicate = (word: SuggestionResult) => boolean;

export interface SpellingDictionary {
readonly name: string;
has(word: string): boolean;
has(word: string, useCompounds?: boolean): boolean;
suggest(word: string, numSuggestions?: number): SuggestionResult[];
genSuggestions(collector: SuggestionCollector): void;
mapWord(word: string): string;
Expand All @@ -20,6 +20,7 @@ export interface SpellingDictionary {

export interface SpellingDictionaryOptions {
repMap?: ReplaceMap;
useCompounds?: boolean;
}

const defaultSuggestions = 10;
Expand All @@ -37,9 +38,13 @@ export class SpellingDictionaryFromSet implements SpellingDictionary {
return this._trie;
}

public has(word: string) {
public has(word: string, useCompounds?: boolean) {
useCompounds = useCompounds === undefined ? this.options.useCompounds : useCompounds;
useCompounds = useCompounds || false;
const mWord = this.mapWord(word).toLowerCase();
return this.words.has(mWord);
return this.words.has(mWord)
|| (useCompounds && this.trie.has(word, true))
|| false;
}

public suggest(word: string, numSuggestions?: number): SuggestionResult[] {
Expand Down Expand Up @@ -101,21 +106,24 @@ export class SpellingDictionaryFromTrie implements SpellingDictionary {
return this._size;
}

public has(word: string) {
public has(word: string, useCompounds?: boolean) {
useCompounds = useCompounds === undefined ? this.options.useCompounds : useCompounds;
useCompounds = useCompounds || false;
word = this.mapWord(word).toLowerCase();
if (this.knownWords.has(word)) return true;
if (this.unknownWords.has(word)) return false;
const wordX = word + '|' + useCompounds;
if (this.knownWords.has(wordX)) return true;
if (this.unknownWords.has(wordX)) return false;

const r = this.trie.has(word);
const r = this.trie.has(word, useCompounds);
// Cache the result.
if (r) {
this.knownWords.add(word);
this.knownWords.add(wordX);
} else {
// clear the unknown word list if it has grown too large.
if (this.unknownWords.size > SpellingDictionaryFromTrie.unknownWordsLimit) {
this.unknownWords.clear();
}
this.unknownWords.add(word);
this.unknownWords.add(wordX);
}

return r;
Expand Down
8 changes: 4 additions & 4 deletions src/SpellingDictionary/SpellingDictionaryCollection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ export class SpellingDictionaryCollection implements SpellingDictionary {
this.wordsToFlag = new Set(wordsToFlag.map(w => w.toLowerCase()));
}

public has(word: string) {
public has(word: string, useCompounds?: boolean) {
word = word.toLowerCase();
return !this.wordsToFlag.has(word) && isWordInAnyDictionary(this.dictionaries, word);
return !this.wordsToFlag.has(word) && isWordInAnyDictionary(this.dictionaries, word, useCompounds);
}

public suggest(word: string, numSuggestions: number): SuggestionResult[] {
Expand All @@ -38,9 +38,9 @@ export function createCollection(dictionaries: SpellingDictionary[], name: strin
return new SpellingDictionaryCollection(dictionaries, name, wordsToFlag);
}

export function isWordInAnyDictionary(dicts: SpellingDictionary[], word: string) {
export function isWordInAnyDictionary(dicts: SpellingDictionary[], word: string, useCompounds?: boolean) {
return !!genSequence(dicts)
.first(dict => dict.has(word));
.first(dict => dict.has(word, useCompounds));
}

export function createCollectionP(
Expand Down
47 changes: 12 additions & 35 deletions src/textValidator.test.ts
Original file line number Diff line number Diff line change
@@ -1,49 +1,26 @@
import { expect } from 'chai';

import { wordSplitter, validateText, hasWordCheck } from './textValidator';
import { validateText, hasWordCheck } from './textValidator';
import { createCollection } from './SpellingDictionary';
import { createSpellingDictionary } from './SpellingDictionary';
import { FreqCounter } from './util/FreqCounter';

// cSpell:enableCompoundWords

describe('Validate textValidator functions', () => {
// cSpell:disable
it('tests splitting words', () => {
const results = [...wordSplitter('appleorange')];
expect(results).to.deep.equal([
['app', 'leorange'],
['appl', 'eorange'],
['apple', 'orange'],
['appleo', 'range'],
['appleor', 'ange'],
['appleora', 'nge'],
]);
});
// cSpell:enable

it('tests trying to split words that are too small', () => {
expect([...wordSplitter('')]).to.be.deep.equal([]);
expect([...wordSplitter('a')]).to.be.deep.equal([]);
expect([...wordSplitter('ap')]).to.be.deep.equal([]);
expect([...wordSplitter('app')]).to.be.deep.equal([]);
// cSpell:disable
expect([...wordSplitter('appl')]).to.be.deep.equal([]);
// cSpell:enable
expect([...wordSplitter('apple')]).to.be.deep.equal([]);
expect([...wordSplitter('apples')]).to.be.deep.equal([
['app', 'les']
]);
});

it('tests hasWordCheck', () => {
// cspell:ignore redgreenblueyellow strawberrymangobanana redwhiteblue
const dictCol = getSpellingDictionaryCollection();
expect(hasWordCheck(dictCol, 'brown', true)).to.be.true;
expect(hasWordCheck(dictCol, 'white', true)).to.be.true;
expect(hasWordCheck(dictCol, 'berry', true)).to.be.true;
expect(hasWordCheck(dictCol, 'whiteberry', true)).to.be.true;
expect(hasWordCheck(dictCol, 'redberry', true)).to.be.true;
// compound words do not cross dictionary boundaries
expect(hasWordCheck(dictCol, 'whiteberry', true)).to.be.false;
expect(hasWordCheck(dictCol, 'redmango', true)).to.be.true;
expect(hasWordCheck(dictCol, 'strawberrymangobanana', true)).to.be.true;
expect(hasWordCheck(dictCol, 'lightbrown', true)).to.be.true;
expect(hasWordCheck(dictCol, 'redgreenblueyellow', true)).to.be.true;
expect(hasWordCheck(dictCol, 'redwhiteblue', true)).to.be.true;
});

it('tests textValidator no word compounds', () => {
Expand All @@ -57,7 +34,7 @@ describe('Validate textValidator functions', () => {
const dictCol = getSpellingDictionaryCollection();
const result = validateText(sampleText, dictCol, { allowCompoundWords: true });
const errors = result.map(wo => wo.text).toArray();
expect(errors).to.deep.equal(['giraffe']);
expect(errors).to.deep.equal(['giraffe', 'whiteberry']);
});

// cSpell:ignore xxxkxxxx xxxbxxxx
Expand All @@ -66,7 +43,7 @@ describe('Validate textValidator functions', () => {
const text = ' tttt gggg xxxxxxx jjjjj xxxkxxxx xxxbxxxx \n' + sampleText;
const result = validateText(text, dictCol, { allowCompoundWords: true });
const errors = result.map(wo => wo.text).toArray().sort();
expect(errors).to.deep.equal(['giraffe', 'xxxbxxxx', 'xxxkxxxx']);
expect(errors).to.deep.equal(['giraffe', 'whiteberry', 'xxxbxxxx', 'xxxkxxxx']);
});

it('tests trailing s, ed, ing, etc. are attached to the words', () => {
Expand Down Expand Up @@ -120,9 +97,9 @@ function getSpellingDictionaryCollection() {
return createCollection(dicts, 'collection');
}

const colors = ['red', 'green', 'blue', 'black', 'white', 'orange', 'purple', 'yellow', 'gray', 'brown'];
const colors = ['red', 'green', 'blue', 'black', 'white', 'orange', 'purple', 'yellow', 'gray', 'brown', 'light', 'dark'];
const fruit = [
'apple', 'banana', 'orange', 'pear', 'pineapple', 'mango', 'avocado', 'grape', 'strawberry', 'blueberry', 'blackberry', 'berry'
'apple', 'banana', 'orange', 'pear', 'pineapple', 'mango', 'avocado', 'grape', 'strawberry', 'blueberry', 'blackberry', 'berry', 'red'
];
const animals = ['ape', 'lion', 'tiger', 'Elephant', 'monkey', 'gazelle', 'antelope', 'aardvark', 'hyena'];
const insects = ['ant', 'snail', 'beetle', 'worm', 'stink bug', 'centipede', 'millipede', 'flea', 'fly'];
Expand Down
19 changes: 3 additions & 16 deletions src/textValidator.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import * as Text from './util/text';
import * as TextRange from './util/TextRange';
import { SpellingDictionary } from './SpellingDictionary';
import { Sequence, genSequence } from 'gensequence';
import { Sequence } from 'gensequence';
import * as RxPat from './Settings/RegExpPatterns';

export interface ValidationOptions {
Expand Down Expand Up @@ -112,21 +112,8 @@ export function isWordValid(dict: SpellingDictionary, wo: Text.TextOffset, text:

export function hasWordCheck(dict: SpellingDictionary, word: string, allowCompounds: boolean) {
word = word.replace(/\\/g, '');
return dict.has(word) || (allowCompounds && hasCompoundWord(dict, word) );
}

export function hasCompoundWord(dict: SpellingDictionary, word: string) {
const foundPair = wordSplitter(word).first(([a, b]) => dict.has(a) && dict.has(b));
return !!foundPair;
}

export function wordSplitter(word: string): Sequence<[string, string]> {
function* split(word: string): IterableIterator<[string, string]> {
for (let i = minWordSplitLen; i <= word.length - minWordSplitLen; ++i) {
yield [word.slice(0, i), word.slice(i)];
}
}
return genSequence(split(word));
// Do not pass allowCompounds down if it is false, that allows for the dictionary to override the value based upon its own settings.
return allowCompounds ? dict.has(word, allowCompounds) : dict.has(word);
}


0 comments on commit c40ce81

Please sign in to comment.