Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,31 @@ Available options:

- `size`: The total number of results to return, defaults to `25`

#### .search.similar(q, esClient, [options]) -> Promise

Perform a fuzzy search for similarly named packages.

Results are ranked based on a combination of analyzer weightings (`quality`, `popularity`, `maintenance`) and the `_score` returned by the [fuzzy match](https://www.elastic.co/guide/en/elasticsearch/guide/current/fuzzy-match-query.html).

```js
const queries = require('@npms/queries');

// ...
queries.search.similar('chaik', esClient)
.then(results => {
// perhaps we were instead looking for chalk?
});
```

Available options:

- `size`: The total number of results to return, defaults to `10`.
- `analyzerWeight`: How much should we weight the analyzer values by? defaults to `2.2`.
- `scoreWeight`: How much should we weight the fuzzy score by? defaults to `1.5`.
- `minScore`: defaults to `4.5`.

_the above default values were based on trial and error examining the
top npm modules, they will likely change over time._

## Tests

Expand Down
1 change: 1 addition & 0 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@

module.exports.search = require('./lib/search');
module.exports.search.suggestions = require('./lib/searchSuggestions');
module.exports.search.similar = require('./lib/searchSimilar');
62 changes: 62 additions & 0 deletions lib/searchSimilar.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
'use strict';

const pick = require('lodash/pick');
const parseQuery = require('./util/parseSearchQuery');
const toEsClient = require('./util/toEsClient');

function searchSimilar(q, esClient, options) {
esClient = toEsClient(esClient);
options = Object.assign({
size: 10,
minScore: 4.5,
analyzerWeight: 2.2,
scoreWeight: 1.5
}, options);

const text = parseQuery.discardQualifiers(q);
const script = `(doc["score.final"].value * ${options.analyzerWeight}) * (_score * ${options.scoreWeight})`;

if (!text) {
return Promise.resolve([]);
}

return Promise.resolve(esClient.search({
/* eslint camelcase: 0 */
index: 'npms-current',
type: 'score',
body: {
size: options.size,
query: {
function_score: {
min_score: options.minScore,
boost_mode: 'replace',
query: {
fuzzy: {
'package.name.raw': {
value: text
}
}
},
script_score: {
lang: 'groovy',
script: script,
params: {},
},
},
},
},
}))
.then((res) => res.hits.hits.map((hit) => {
// We can't use _fields in the query because the JSON properties order get messed up,
// see https://github.com/elastic/elasticsearch/issues/17639
// So we filter the source fields manually with pick().. this is not ideal since there's payload
// navigating through the network that we do not use, but it's definitively better than having order messed up
const result = pick(hit._source, ['package', 'flags', 'score']);

result.searchScore = hit._score;

return result;
}));
}

module.exports = searchSimilar;
Loading