diff --git a/CHANGELOG.md b/CHANGELOG.md index 6718eb45..441aa8ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,11 @@ `MiniSearch` follows [semantic versioning](https://semver.org/spec/v2.0.0.html). +# v5.1.0 (unreleased) + + - The `processTerm` option can now also expand a single term into several + terms by returning an array of strings. + # v5.0.0 This is a major release. The main change is an improved scoring algorithm based diff --git a/src/MiniSearch.test.js b/src/MiniSearch.test.js index b73a0b0d..9772e151 100644 --- a/src/MiniSearch.test.js +++ b/src/MiniSearch.test.js @@ -130,6 +130,16 @@ describe('MiniSearch', () => { expect(processTerm).toHaveBeenCalledWith(term, 'title') }) }) + + it('allows processTerm to expand a single term into several terms', () => { + const processTerm = (string) => string === 'foobar' ? ['foo', 'bar'] : string + const ms = new MiniSearch({ fields: ['title', 'text'], processTerm }) + expect(() => { + ms.add({ id: 123, text: 'foobar' }) + }).not.toThrowError() + + expect(ms.search('bar')).toHaveLength(1) + }) }) describe('remove', () => { @@ -259,6 +269,18 @@ describe('MiniSearch', () => { }).not.toThrowError() }) + it('allows processTerm to expand a single term into several terms', () => { + const processTerm = (string) => string === 'foobar' ? ['foo', 'bar'] : string + const ms = new MiniSearch({ fields: ['title', 'text'], processTerm }) + const document = { id: 123, title: 'foobar' } + ms.add(document) + expect(() => { + ms.remove(document) + }).not.toThrowError() + + expect(ms.search('bar')).toHaveLength(0) + }) + describe('when using custom per-field extraction/tokenizer/processing', () => { const documents = [ { id: 1, title: 'Divina Commedia', tags: 'dante,virgilio', author: { name: 'Dante Alighieri' } }, @@ -651,6 +673,13 @@ describe('MiniSearch', () => { expect(results.map(({ id }) => id).sort()).toEqual([1]) }) + it('allows processTerm to expand a single term into several terms', () => { + const processTerm = (string) => string === 'divinacommedia' ? ['divina', 'commedia'] : string + const results = ms.search('divinacommedia', { processTerm }) + expect(results.length).toBeGreaterThan(0) + expect(results.map(({ id }) => id).sort()).toEqual([1]) + }) + it('allows custom filtering of results on the basis of stored fields', () => { const results = ms.search('del', { filter: ({ category }) => category === 'poetry' diff --git a/src/MiniSearch.ts b/src/MiniSearch.ts index cbfb1184..099258a4 100644 --- a/src/MiniSearch.ts +++ b/src/MiniSearch.ts @@ -104,7 +104,7 @@ export type SearchOptions = { * Function to process or normalize terms in the search query. By default, the * same term processor used for indexing is used also for search. */ - processTerm?: (term: string) => string | null | undefined | false + processTerm?: (term: string) => string | string[] | null | undefined | false } type SearchOptionsWithDefaults = SearchOptions & { @@ -174,8 +174,11 @@ export type Options = { * The function takes as arguments a term to process, and the name of the * field it comes from. It should return the processed term as a string, or a * falsy value to reject the term entirely. + * + * It can also return an array of strings, in which case each string in the + * returned array is indexed as a separate term. */ - processTerm?: (term: string, fieldName?: string) => string | null | undefined | false, + processTerm?: (term: string, fieldName?: string) => string | string[] | null | undefined | false, /** * Default search options (see the [[SearchOptions]] type and the @@ -199,7 +202,7 @@ type OptionsWithDefaults = Options & { tokenize: (text: string, fieldName: string) => string[], - processTerm: (term: string, fieldName: string) => string | null | undefined | false, + processTerm: (term: string, fieldName: string) => string | string[] | null | undefined | false, searchOptions: SearchOptionsWithDefaults, @@ -508,7 +511,11 @@ export default class MiniSearch { for (const term of tokens) { const processedTerm = processTerm(term, field) - if (processedTerm) { + if (Array.isArray(processedTerm)) { + for (const t of processedTerm) { + this.addTerm(fieldId, shortDocumentId, t) + } + } else if (processedTerm) { this.addTerm(fieldId, shortDocumentId, processedTerm) } } @@ -591,7 +598,11 @@ export default class MiniSearch { for (const term of tokens) { const processedTerm = processTerm(term, field) - if (processedTerm) { + if (Array.isArray(processedTerm)) { + for (const t of processedTerm) { + this.removeTerm(fieldId, shortId, t) + } + } else if (processedTerm) { this.removeTerm(fieldId, shortId, processedTerm) } } @@ -1012,7 +1023,7 @@ export default class MiniSearch { const options = { tokenize, processTerm, ...globalSearchOptions, ...searchOptions } const { tokenize: searchTokenize, processTerm: searchProcessTerm } = options const terms = searchTokenize(query) - .map((term: string) => searchProcessTerm(term)) + .flatMap((term: string) => searchProcessTerm(term)) .filter((term) => !!term) as string[] const queries: QuerySpec[] = terms.map(termToQuerySpec(options)) const results = queries.map(query => this.executeQuerySpec(query, options))