From 65c20083f30819bad5a7e4cc7206eeaa8dbb5366 Mon Sep 17 00:00:00 2001 From: David First Date: Mon, 30 Jan 2017 13:37:28 -0500 Subject: [PATCH 1/2] index a shorter version of the description --- src/search/indexer.js | 15 +++++++++++++-- src/search/searcher.js | 2 +- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/search/indexer.js b/src/search/indexer.js index 8250b649af80..c4a9cb57404f 100644 --- a/src/search/indexer.js +++ b/src/search/indexer.js @@ -12,7 +12,8 @@ export type Doc = { tokenizedBox: string, functionNames: string, tokenizedFunctionNames: string, - description: string + description: string, + min_description: string }; let localIndex; @@ -21,6 +22,15 @@ function tokenizeStr(str: string): string { return str.trim().split(/(?=[A-Z])/).join(' ').toLowerCase().split(/ |_|-/).join(' '); } +/** + * returns the first sentence of the description. + * @param {string} desc + * @return {string} + */ +function minimizeDescription(desc: string = ''): string { + return desc.split(/\.|;/)[0]; // split by a dot or a semicolon +} + function prepareDoc(docs: Object, component: Component): Doc { const name = component.name; const box = component.box; @@ -33,7 +43,8 @@ function prepareDoc(docs: Object, component: Component): Doc { tokenizedBox: tokenizeStr(box), functionNames, tokenizedFunctionNames: tokenizeStr(functionNames), - description: docs.map(doc => doc.description).join(' ') + description: docs.map(doc => doc.description).join(' '), + min_description: docs.map(doc => minimizeDescription(doc.description)).join(' ') }; } diff --git a/src/search/searcher.js b/src/search/searcher.js index c41cc59e63d1..fed063e2a58a 100644 --- a/src/search/searcher.js +++ b/src/search/searcher.js @@ -57,7 +57,7 @@ function buildQuery(queryStr: string) { query.push(queryItem('tokenizedName', tokenizedQuery, 3)); query.push(queryItem('functionNames', queryStr, 3)); query.push(queryItem('tokenizedFunctionNames', tokenizedQuery, 2)); - query.push(queryItem('description', queryStr)); + query.push(queryItem('min_description', queryStr)); return query; } From b7894723478960eaf6d12b9ac13eb7e881b1e93a Mon Sep 17 00:00:00 2001 From: David First Date: Mon, 30 Jan 2017 20:38:19 -0500 Subject: [PATCH 2/2] search: refactor the boosting, add a patch of an extra field to workaround a index-search bug --- src/search/indexer.js | 7 ++++--- src/search/searcher.js | 37 +++++++++++++++++++++++----------- src/search/serverless-index.js | 3 ++- 3 files changed, 31 insertions(+), 16 deletions(-) diff --git a/src/search/indexer.js b/src/search/indexer.js index c4a9cb57404f..f5ae12fab8f4 100644 --- a/src/search/indexer.js +++ b/src/search/indexer.js @@ -13,7 +13,7 @@ export type Doc = { functionNames: string, tokenizedFunctionNames: string, description: string, - min_description: string + minDescription: string }; let localIndex; @@ -38,13 +38,14 @@ function prepareDoc(docs: Object, component: Component): Doc { return { id: `${box}_${name}`, name, - tokenizedName: tokenizeStr(name), box, + tokenizedNameExtra: tokenizeStr(name), // TODO: remove it when possible + tokenizedName: tokenizeStr(name), tokenizedBox: tokenizeStr(box), functionNames, tokenizedFunctionNames: tokenizeStr(functionNames), description: docs.map(doc => doc.description).join(' '), - min_description: docs.map(doc => minimizeDescription(doc.description)).join(' ') + minDescription: docs.map(doc => minimizeDescription(doc.description)).join(' ') }; } diff --git a/src/search/searcher.js b/src/search/searcher.js index fed063e2a58a..b7f3468e9057 100644 --- a/src/search/searcher.js +++ b/src/search/searcher.js @@ -3,6 +3,16 @@ import serverlessIndex from './serverless-index'; import indexer from './indexer'; import type Doc from './indexer'; +const boost = { + box: 3, + tokenizedBox: 2, + name: 5, + tokenizedName: 4, + functionNames: 2, + tokenizedFunctionNames: 2, + minDescription: 1 +}; + function totalHits(index: Promise, query: string) { return new Promise((resolve, reject) => { return index.then((indexInstance) => { @@ -41,23 +51,26 @@ function formatSearchResult(doc: Doc): string { return `> ${doc.box}/${doc.name}`; } -function queryItem(field, query, boost = 1) { - return { - AND: { [field]: query.toLowerCase().split(' ') }, - BOOST: boost +function queryItem(field, queryStr): Object { + const query = { + AND: { [field]: queryStr.toLowerCase().split(' ') }, + BOOST: boost[field], + NOT: {} }; + + return query; } -function buildQuery(queryStr: string) { +function buildQuery(queryStr: string): Array { const tokenizedQuery = indexer.tokenizeStr(queryStr); const query = []; - query.push(queryItem('box', queryStr, 4)); - query.push(queryItem('tokenizedBox', queryStr, 3)); - query.push(queryItem('name', queryStr, 4)); - query.push(queryItem('tokenizedName', tokenizedQuery, 3)); - query.push(queryItem('functionNames', queryStr, 3)); - query.push(queryItem('tokenizedFunctionNames', tokenizedQuery, 2)); - query.push(queryItem('min_description', queryStr)); + query.push(queryItem('box', queryStr)); + query.push(queryItem('tokenizedBox', queryStr)); + query.push(queryItem('name', queryStr)); + query.push(queryItem('tokenizedName', tokenizedQuery)); + query.push(queryItem('functionNames', queryStr)); + query.push(queryItem('tokenizedFunctionNames', tokenizedQuery)); + query.push(queryItem('minDescription', queryStr)); return query; } diff --git a/src/search/serverless-index.js b/src/search/serverless-index.js index 0f9910418763..bd50fd6763fc 100644 --- a/src/search/serverless-index.js +++ b/src/search/serverless-index.js @@ -19,7 +19,8 @@ function deleteDb(scopePath: string) { function initializeIndex(scopePath: string): Promise { const indexOptions = { indexPath: getIndexPath(scopePath), - logLevel + logLevel, + stopwords: [] }; if (!initializeIndex.index) { // static var to make sure the index is not instantiated twice