Skip to content

Commit

Permalink
Merge 910c043 into b659edd
Browse files Browse the repository at this point in the history
  • Loading branch information
lucaong committed Nov 28, 2022
2 parents b659edd + 910c043 commit 497c265
Show file tree
Hide file tree
Showing 2 changed files with 207 additions and 19 deletions.
62 changes: 62 additions & 0 deletions src/MiniSearch.test.js
Expand Up @@ -759,6 +759,68 @@ describe('MiniSearch', () => {
})
})

describe('addFields', () => {
it('add fields to an existing document', () => {
const options = { fields: ['text', 'author'], storeFields: ['text', 'author', 'n'] }
const ms = new MiniSearch(options)
const other = new MiniSearch(options)

ms.add({ id: 1, text: 'Some quite interesting stuff' })
ms.addFields(1, { author: 'Al et. al.', n: 5 })

other.add({ id: 1, text: 'Some quite interesting stuff', author: 'Al et. al.', n: 5 })

expect(ms).toEqual(other)
})

it('throws an error if the document did not exist', () => {
const ms = new MiniSearch({ fields: ['text'] })
expect(() => {
ms.addFields(1, { text: 'hello' })
}).toThrow('MiniSearch: no document with ID 1')
})

it('throws an error if adding a field that already exists', () => {
const ms = new MiniSearch({ fields: ['text'] })
ms.add({ id: 1, text: 'Some interesting stuff' })
expect(() => {
ms.addFields(1, { text: 'hello' })
}).toThrow('MiniSearch: field text already exists on document with ID 1')
})
})

describe('removeFields', () => {
it('removes fields to an existing document', () => {
const options = { fields: ['text', 'author'], storeFields: ['text', 'author', 'n'] }
const ms = new MiniSearch(options)
const other = new MiniSearch(options)

ms.add({ id: 1, text: 'Some quite interesting stuff', author: 'Al et. al.', n: 5 })
ms.add({ id: 2, text: 'Lalala', author: 'Someone', n: 3 })
ms.removeFields(1, { text: 'Some quite interesting stuff', n: 5 })

other.add({ id: 1, author: 'Al et. al.' })
other.add({ id: 2, text: 'Lalala', author: 'Someone', n: 3 })

expect(ms).toEqual(other)
})

it('throws an error if the document did not exist', () => {
const ms = new MiniSearch({ fields: ['text'] })
expect(() => {
ms.removeFields(1, { text: 'hello' })
}).toThrow('MiniSearch: no document with ID 1')
})

it('throws an error if removing a field that did not exist', () => {
const ms = new MiniSearch({ fields: ['text', 'author'] })
ms.add({ id: 1, author: 'Al et. al.' })
expect(() => {
ms.removeFields(1, { text: 'Some interesting stuff' })
}).toThrow('MiniSearch: field text does not exist on document with ID 1')
})
})

describe('vacuum', () => {
it('cleans up discarded documents from the index', async () => {
const ms = new MiniSearch({ fields: ['text'], storeFields: ['text'] })
Expand Down
164 changes: 145 additions & 19 deletions src/MiniSearch.ts
Expand Up @@ -588,7 +588,7 @@ export default class MiniSearch<T = any> {
this._enqueuedVacuum = null
this._enqueuedVacuumConditions = defaultVacuumConditions

this.addFields(this._options.fields)
this.addFieldIds(this._options.fields)
}

/**
Expand All @@ -597,8 +597,9 @@ export default class MiniSearch<T = any> {
* @param document The document to be indexed
*/
add (document: T): void {
const { extractField, tokenize, processTerm, fields, idField } = this._options
const { extractField, idField } = this._options
const id = extractField(document, idField)

if (id == null) {
throw new Error(`MiniSearch: document does not have ID field "${idField}"`)
}
Expand All @@ -610,15 +611,66 @@ export default class MiniSearch<T = any> {
const shortDocumentId = this.addDocumentId(id)
this.saveStoredFields(shortDocumentId, document)

this.addToIndex(shortDocumentId, document, true)
}

/**
* Adds some fields to an existing documeny
*
* The added fields should not be already present on the document, or an error
* will be thrown.
*
* ## Example:
*
* const miniSearch = new MiniSearch({ fields: ['title', 'text', 'author'] })
*
* miniSearch.add({ id: 1, title: 'Neuromancer' })
*
* miniSearch.addFields(1, {
* text: 'The sky above the port was the color of television, tuned to a dead channel.',
* author: 'William Gibson'
* })
*
* // The above is equivalent to:
* miniSearch.add({
* id: 1,
* title: 'Neuromancer',
* text: 'The sky above the port was the color of television, tuned to a dead channel.',
* author: 'William Gibson'
* })
*
* @param id The document ID
* @param toAdd The fields to add
*/
addFields (id: any, toAdd: T): void {
const shortDocumentId = this._idToShortId.get(id)

if (shortDocumentId == null) {
throw new Error(`MiniSearch: no document with ID ${id}`)
}

this.saveStoredFields(shortDocumentId, toAdd)

this.addToIndex(shortDocumentId, toAdd, false)
}

private addToIndex (shortDocumentId: number, document: T, added: boolean) {
const { extractField, tokenize, processTerm, fields } = this._options

for (const field of fields) {
const fieldValue = extractField(document, field)
if (fieldValue == null) continue

const tokens = tokenize(fieldValue.toString(), field)
const fieldId = this._fieldIds[field]

const uniqueTerms = new Set(tokens).size
this.addFieldLength(shortDocumentId, fieldId, this._documentCount - 1, uniqueTerms)
const uniqueTerms = new Set(tokens)
uniqueTerms.delete('')

if (this._fieldLength.get(shortDocumentId)?.[fieldId] != null) {
throw new Error(`MiniSearch: field ${field} already exists on document with ID ${this._documentIds.get(shortDocumentId)}`)
}
this.addFieldLength(shortDocumentId, fieldId, this._documentCount, uniqueTerms.size, added)

for (const term of tokens) {
const processedTerm = processTerm(term, field)
Expand Down Expand Up @@ -689,7 +741,7 @@ export default class MiniSearch<T = any> {
* @param document The document to be removed
*/
remove (document: T): void {
const { tokenize, processTerm, extractField, fields, idField } = this._options
const { extractField, idField } = this._options
const id = extractField(document, idField)

if (id == null) {
Expand All @@ -702,15 +754,89 @@ export default class MiniSearch<T = any> {
throw new Error(`MiniSearch: cannot remove document with ID ${id}: it is not in the index`)
}

this.removeFromIndex(shortId, document, true)

this._storedFields.delete(shortId)
this._documentIds.delete(shortId)
this._idToShortId.delete(id)
this._fieldLength.delete(shortId)
this._documentCount -= 1
}

/**
* Removes some fields from an existing documeny
*
* The removed fields should be present on the document, or an error will be
* thrown.
*
* Note: removing _all_ the fields in a document with `removeFields` is
* different from removing the whole document with [[MiniSearch.remove]] or
* [[MiniSearch.discard]]. The difference in the first case is that the
* document is still counted in [[MiniSearch.documentCount]], even if it is
* practically not searchable anymore.
*
* ## Example:
*
* const miniSearch = new MiniSearch({ fields: ['title', 'text', 'author'] })
*
* miniSearch.add({
* id: 1,
* title: 'Neuromancer',
* text: 'The sky above the port was the color of television, tuned to a dead channel.',
* author: 'William Gibson'
* })
*
* miniSearch.removeFields(1, {
* text: 'The sky above the port was the color of television, tuned to a dead channel.',
* author: 'William Gibson'
* })
*
* // The above is equivalent to:
* miniSearch.add({
* id: 1,
* title: 'Neuromancer'
* })
*
* @param id The document ID
* @param toRemove The fields to remove
*/
removeFields (id: any, toRemove: T) {
const { storeFields, extractField } = this._options
const shortDocumentId = this._idToShortId.get(id)

if (shortDocumentId == null) {
throw new Error(`MiniSearch: no document with ID ${id}`)
}

this.removeFromIndex(shortDocumentId, toRemove, false)

const storedFields = this._storedFields.get(shortDocumentId)

for (const fieldName of storeFields) {
const fieldValue = extractField(toRemove, fieldName)
if (storedFields != null && fieldValue !== undefined) {
delete storedFields[fieldName]
}
}
}

private removeFromIndex (shortId: number, document: T, removed: boolean) {
const { tokenize, processTerm, extractField, fields } = this._options

for (const field of fields) {
const fieldValue = extractField(document, field)
if (fieldValue == null) continue

const tokens = tokenize(fieldValue.toString(), field)
const fieldId = this._fieldIds[field]

const uniqueTerms = new Set(tokens).size
this.removeFieldLength(shortId, fieldId, this._documentCount, uniqueTerms)
const uniqueTerms = new Set(tokens)
uniqueTerms.delete('')

if (this._fieldLength.get(shortId)?.[fieldId] == null) {
throw new Error(`MiniSearch: field ${field} does not exist on document with ID ${this._documentIds.get(shortId)}`)
}
this.removeFieldLength(shortId, fieldId, this._documentCount, uniqueTerms.size, removed)

for (const term of tokens) {
const processedTerm = processTerm(term, field)
Expand All @@ -723,12 +849,6 @@ export default class MiniSearch<T = any> {
}
}
}

this._storedFields.delete(shortId)
this._documentIds.delete(shortId)
this._idToShortId.delete(id)
this._fieldLength.delete(shortId)
this._documentCount -= 1
}

/**
Expand Down Expand Up @@ -1706,7 +1826,7 @@ export default class MiniSearch<T = any> {
/**
* @ignore
*/
private addFields (fields: string[]): void {
private addFieldIds (fields: string[]): void {
for (let i = 0; i < fields.length; i++) {
this._fieldIds[fields[i]] = i
}
Expand All @@ -1715,26 +1835,32 @@ export default class MiniSearch<T = any> {
/**
* @ignore
*/
private addFieldLength (documentId: number, fieldId: number, count: number, length: number): void {
private addFieldLength (documentId: number, fieldId: number, count: number, length: number, added: boolean): void {
let fieldLengths = this._fieldLength.get(documentId)
if (fieldLengths == null) this._fieldLength.set(documentId, fieldLengths = [])
const n = added ? 1 : 0

fieldLengths[fieldId] = length

const averageFieldLength = this._avgFieldLength[fieldId] || 0
const totalFieldLength = (averageFieldLength * count) + length
this._avgFieldLength[fieldId] = totalFieldLength / (count + 1)
const totalFieldLength = (averageFieldLength * (count - n)) + length
this._avgFieldLength[fieldId] = totalFieldLength / count
}

/**
* @ignore
*/
private removeFieldLength (documentId: number, fieldId: number, count: number, length: number): void {
private removeFieldLength (documentId: number, fieldId: number, count: number, length: number, removed: boolean = true): void {
const fieldLengths = this._fieldLength.get(documentId)
delete fieldLengths?.[fieldId]

if (count === 1) {
this._avgFieldLength[fieldId] = 0
return
}
const n = removed ? 1 : 0
const totalFieldLength = (this._avgFieldLength[fieldId] * count) - length
this._avgFieldLength[fieldId] = totalFieldLength / (count - 1)
this._avgFieldLength[fieldId] = totalFieldLength / (count - n)
}

/**
Expand Down

0 comments on commit 497c265

Please sign in to comment.