Skip to content
This repository has been archived by the owner on Jan 22, 2021. It is now read-only.

Commit

Permalink
Added word based full-text search index
Browse files Browse the repository at this point in the history
  • Loading branch information
macrat committed Mar 5, 2018
1 parent 6898cb6 commit 27e63a4
Show file tree
Hide file tree
Showing 6 changed files with 263 additions and 37 deletions.
36 changes: 34 additions & 2 deletions lib/IFTSArrayPromise.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import {splitQuery} from './utils';
import {splitQuery, splitWords} from './utils';
import {NoSuchColumnError} from './errors';


Expand Down Expand Up @@ -205,7 +205,7 @@ export default class IFTSArrayPromise {
/**
* Get contents that have matched property by full-text search.
*
* This method can search even if didn't made fulltext index.
* This method can search even if didn't made ngram index.
*
* WARNING: This method always processes all contents without using indexes.
* Please consider using {@link IFTSTransaction#search}.
Expand Down Expand Up @@ -234,4 +234,36 @@ export default class IFTSArrayPromise {

return this.filter(data => queries.every(q => columns.some(col => data[col].includes(q))));
}

/**
* Find contents that have fully matched word in property.
*
* This method can search even if didn't made word index.
*
* WARNING: This method always processes all contents without using indexes.
* Please consider using {@link IFTSTransaction#searchWord}.
*
*
* @param {object|object[]} columns - column names for search.
* @param {string} query - query for search.
*
* @return {IFTSArrayPromise} matched contents. may reject with {@link NoSuchColumnError}.
*/
searchWord(columns, query) {
if (typeof columns === 'string') {
columns = [columns];
}

for (let c of columns) {
if (!this.indexes.has(c)) {
return IFTSArrayPromise.reject(this.indexes, new NoSuchColumnError(c));
}
}

const queries = splitWords(query);

return this.filter(data => queries.every(q => columns.some(col => {
return splitWords(data[col]).includes(q);
})));
}
}
127 changes: 110 additions & 17 deletions lib/IFTSTransaction.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import {tokenize, splitQuery, fastMap, flatten} from './utils';
import {tokenize, splitQuery, splitWords, fastMap, flatten, dedup} from './utils';
import {NoSuchColumnError, InvalidKeyError} from './errors';
import IFTSArrayPromise from './IFTSArrayPromise';

Expand Down Expand Up @@ -50,14 +50,19 @@ export default class IFTSTransaction {
*/
put(...contents) {
const store = this.transaction.objectStore('data');
const fts_indexes = fastMap([...this.db.fulltext_indexes], column => ({name: column, store: this.transaction.objectStore(this.db.index_prefix + column)}));
const ngram_indexes = fastMap([...this.db.ngram_indexes], column => ({name: column, store: this.transaction.objectStore(this.db.index_prefix + 'ngram_' + column)}));
const word_indexes = fastMap([...this.db.word_indexes], column => ({name: column, store: this.transaction.objectStore(this.db.index_prefix + 'word_' + column)}));

const putPromises = new Array(contents.length);
for (let i=0; i<contents.length; i++) {
putPromises[i] = new Promise((resolve, reject) => {
const req = store.put(contents[i]);
req.onerror = reject;
req.onsuccess = ev => resolve(this._updateIndex(ev.target.result, contents[i], fts_indexes));
req.onsuccess = ev => {
resolve(
this._updateNGramIndex(ev.target.result, contents[i], ngram_indexes)
.then(() => this._updateWordIndex(ev.target.result, contents[i], word_indexes)))
};
});
}

Expand All @@ -75,13 +80,13 @@ export default class IFTSTransaction {
}

/**
* Update index for full-text search.
* Update ngram index.
*
* @ignore
*/
_updateIndex(key, data, fts_indexes) {
return this._deleteIndex(key)
.then(() => Promise.all(fastMap(fts_indexes, col => {
_updateNGramIndex(key, data, ngram_indexes) {
return this._deleteIndex(key, ngram_indexes.map(x => this.db.index_prefix + 'ngram_' + x.name))
.then(() => Promise.all(fastMap(ngram_indexes, col => {
const tokens = tokenize(data[col.name]);
const promises = new Array(tokens.length);
for (let i=0; i<tokens.length; i++) {
Expand All @@ -100,14 +105,39 @@ export default class IFTSTransaction {
}

/**
* Delete FTS index from database.
* Update word index.
*
* @ignore
*/
_deleteIndex(key) {
return Promise.all([...this.db.fulltext_indexes].map(col => {
_updateWordIndex(key, data, word_indexes) {
return this._deleteIndex(key, word_indexes.map(x => this.db.index_prefix + 'word_' + x.name))
.then(() => Promise.all(fastMap(word_indexes, col => {
const words = splitWords(data[col.name]);
const promises = new Array(words.length);
for (let i=0; i<words.length; i++) {
promises[i] = new Promise((resolve, reject) => {
const req = col.store.put({
key: key,
word: words[i],
});
req.onsuccess = () => resolve();
req.onerror = reject;
});
}
return Promise.all(promises);
})))
.then(() => [key, data]);
}

/**
* Delete content by FTS indexes of database.
*
* @ignore
*/
_deleteIndex(key, tableNames) {
return Promise.all(tableNames.map(table => {
return new Promise((resolve, reject) => {
const store = this.transaction.objectStore(this.db.index_prefix + col);
const store = this.transaction.objectStore(table);
store.onerror = reject;

const requests = [];
Expand Down Expand Up @@ -150,7 +180,11 @@ export default class IFTSTransaction {
const req = this.transaction.objectStore('data').delete(key);
req.onerror = reject;
req.onsuccess = resolve;
}).then(() => this._deleteIndex(key));
})
.then(() => this._deleteIndex(key, [
...[...this.db.ngram_indexes].map(x => this.db.index_prefix + 'ngram_' + x),
...[...this.db.word_indexes].map(x => this.db.index_prefix + 'word_' + x),
]))
})).then(() => this);
}

Expand Down Expand Up @@ -410,7 +444,7 @@ export default class IFTSTransaction {
* @ignore
*/
_takeCandidatesBySingleColumn(column, queries) {
const index = this.transaction.objectStore(this.db.index_prefix + column).index('token');
const index = this.transaction.objectStore(this.db.index_prefix + 'ngram_' + column).index('token');
const result = [];

for (let q in queries) {
Expand All @@ -421,7 +455,7 @@ export default class IFTSTransaction {

const promises = new Array(queries[q].length);
for (let i=0; i<queries[q].length; i++) {
promises[i] = this._readCursor(index.openCursor(queries[q][i])).map(data => data.key);
promises[i] = this._readCursor(index.openCursor(queries[q][i]), null, data => data.key);
}

const candidate = Promise.all(promises)
Expand Down Expand Up @@ -495,8 +529,8 @@ export default class IFTSTransaction {
/**
* Get contents that have matched property by full-text search.
*
* All target columns have to made fulltext index when created database.
* If you didn't made fulltext index, you can use {@link IFTSArrayPromise#search} (but this way is very slow).
* All target columns have to made ngram index when created database.
* If you didn't made ngram index, you can use {@link IFTSArrayPromise#search} (but this way is very slow).
*
*
* @param {object|object[]} columns - column names for search.
Expand All @@ -510,7 +544,7 @@ export default class IFTSTransaction {
}

for (let i=0; i<columns.length; i++) {
if (!this.db.fulltext_indexes.has(columns[i])) {
if (!this.db.ngram_indexes.has(columns[i])) {
return IFTSArrayPromise.reject(this.db.indexes, new NoSuchColumnError(columns[i]));
}
}
Expand All @@ -534,4 +568,63 @@ export default class IFTSTransaction {
Promise.all(candidatePromises).then(xs => this._pruneCandidates(queries_length, xs)),
);
}

/**
* Find contents that have fully matched word in property.
*
* All target columns have to made word index when created database.
* If you didn't made word index, you can use {@link IFTSArrayPromise#searchWord} (but this way is very slow).
*
*
* @param {object|object[]} columns - column names for search.
* @param {string} query - query for search.
*
* @return {IFTSArrayPromise} matched contents. may reject with {@link NoSuchColumnError}.
*/
searchWord(columns, query) {
if (typeof columns === 'string') {
columns = [columns];
}

for (let i=0; i<columns.length; i++) {
if (!this.db.word_indexes.has(columns[i])) {
return IFTSArrayPromise.reject(this.db.indexes, new NoSuchColumnError(columns[i]));
}
}

const queries = splitWords(query).map(x => ({text: x, keyRange: this._KeyRange.only(x)}));

return new IFTSArrayPromise(this.db.indexes, Promise.all(flatten(columns.map(col => {
const index = this.transaction.objectStore(this.db.index_prefix + 'word_' + col).index('word');

return queries.map(query => this._readCursor(index.openCursor(query.keyRange), null, data => [data.key, query.text]));
}))).then(candidates => {
candidates = dedup(flatten(candidates));

const counts = {};
for (let i=0; i<candidates.length; i++) {
const key = candidates[i][0];
if (!(key in counts)) {
counts[key] = 0;
}
counts[key]++;
}

const hits = new Array(candidates.length);
let hits_count = 0;
for (let i=0; i<candidates.length; i++) {
const key = candidates[i][0];
if (counts[key] >= queries.length) {
hits[hits_count] = key;
hits_count++;
}
}

const result = new Array(hits_count);
for (let i=0; i<hits_count; i++) {
result[i] = this.get(hits[i]);
}
return new IFTSArrayPromise(this.db.indexes, Promise.all(result));
}));
}
}
68 changes: 53 additions & 15 deletions lib/IndexedFTS.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,20 @@ export default class IndexedFTS {
* If you want change schema of database, please change version number.
* Please be careful, all contents will remove when changing the version number.
*
* Index types are 'primary', 'unique', 'fulltext', or not indexed.
* Index types are 'primary', 'unique', 'fulltext', 'ngram', 'word', or normal index.
*
* 'primary' is a primary key of the database. 'primary' can't set to multiple columns.
*
* 'unique' is columns that have a unique value in the database.
* If set 'fulltext' IndexedFTS will make index table for full-text search.
* Not indexed columns will store to the database but can't search.
*
* If set 'ngram' IndexedFTS will make 2-gram index table for full-text search.
* 'fulltext' is alias to 'ngram'.
*
* 'word' is word based index.
* The word index will split text with whitespaces and store those.
* Word index is faster than the 'ngram' index but can't find a partial match in the word.
*
* The normal index that not set optioned that not unique, not primary, and not indexed for full-text search. You can numeric search like {@link IndexedFTS#lower} {@link IndexedFTS#between} even if not set option.
*
* If you want to set some index types, please use object like `{unique: true, fulltext: true}`.
*
Expand Down Expand Up @@ -56,7 +64,10 @@ export default class IndexedFTS {
this.primary_key = null;

/** @type {Set<string>} */
this.fulltext_indexes = new Set();
this.ngram_indexes = new Set();

/** @type {Set<string>} */
this.word_indexes = new Set();

/** @type {Set<string>} */
this.unique_indexes = new Set();
Expand All @@ -80,15 +91,20 @@ export default class IndexedFTS {
this.normal_indexes.add(x);
}

if (schema[x] === 'fulltext' || schema[x].fulltext) {
this.fulltext_indexes.add(x);
if (schema[x] === 'ngram' || schema[x].ngram
|| schema[x] === 'fulltext' || schema[x].fulltext) {
this.ngram_indexes.add(x);
}

if (schema[x] === 'word' || schema[x].word) {
this.word_indexes.add(x);
}
}
}

/** @type {Set<string>} */
get indexes() {
const r = new Set([...this.fulltext_indexes, ...this.unique_indexes, ...this.normal_indexes]);
const r = new Set([...this.ngram_indexes, ...this.word_indexes, ...this.unique_indexes, ...this.normal_indexes]);
if (this.primary_key !== null) {
r.add(this.primary_key);
}
Expand Down Expand Up @@ -121,12 +137,18 @@ export default class IndexedFTS {

this.normal_indexes.forEach(x => store.createIndex(x, x, {unique: false}));

this.fulltext_indexes.forEach(column => {
const fts_store = this.db.createObjectStore(this.index_prefix + column, {autoIncrement: true});
this.ngram_indexes.forEach(column => {
const fts_store = this.db.createObjectStore(this.index_prefix + 'ngram_' + column, {autoIncrement: true});
fts_store.onerror = reject
fts_store.createIndex('key', 'key', {unique: false});
fts_store.createIndex('token', 'token', {unique: false});
fts_store.createIndex('uni', ['key', 'token'], {unique: true});
});

this.word_indexes.forEach(column => {
const fts_store = this.db.createObjectStore(this.index_prefix + 'word_' + column, {autoIncrement: true});
fts_store.onerror = reject
fts_store.createIndex('key', 'key', {unique: false});
fts_store.createIndex('word', 'word', {unique: false});
});
};
});
Expand All @@ -149,7 +171,9 @@ export default class IndexedFTS {
*/
transaction(mode='readonly', target=null) {
if (target === null) {
target = [...this.fulltext_indexes].map(x => this.index_prefix + x).concat(['data']);
const ngrams = [...this.ngram_indexes].map(x => this.index_prefix + 'ngram_' + x);
const words = [...this.word_indexes].map(x => this.index_prefix + 'word_' + x);
target = ngrams.concat(words).concat(['data']);
}
return new IFTSTransaction(this, this.db.transaction(target, mode));
}
Expand Down Expand Up @@ -321,10 +345,8 @@ export default class IndexedFTS {
/**
* Get contents that have matched property by full-text search.
*
* This method can search even if didn't made fulltext index.
*
* WARNING: This method always processes all contents without using indexes.
* Please consider using {@link IFTSTransaction#search}.
* All target columns have to made ngram index when created database.
* If you didn't made ngram index, you can use {@link IFTSArrayPromise#search} (but this way is very slow).
*
*
* @param {object|object[]} columns - column names for search.
Expand All @@ -335,4 +357,20 @@ export default class IndexedFTS {
search(columns, query) {
return this.transaction().search(columns, query);
}

/**
* Find contents that have fully matched word in property.
*
* All target columns have to made word index when created database.
* If you didn't made word index, you can use {@link IFTSArrayPromise#searchWord} (but this way is very slow).
*
*
* @param {object|object[]} columns - column names for search.
* @param {string} query - query for search.
*
* @return {IFTSArrayPromise} matched contents. may reject with {@link NoSuchColumnError}.
*/
searchWord(columns, query) {
return this.transaction().searchWord(columns, query);
}
}

0 comments on commit 27e63a4

Please sign in to comment.