/
diversified-sampler-aggregation.js
121 lines (109 loc) · 4.22 KB
/
diversified-sampler-aggregation.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
'use strict';
const isNil = require('lodash.isnil');
const {
util: { invalidParam },
consts: { EXECUTION_HINT_SET }
} = require('../../core');
const BucketAggregationBase = require('./bucket-aggregation-base');
const ES_REF_URL =
'https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-diversified-sampler-aggregation.html';
const invalidExecutionHintParam = invalidParam(
ES_REF_URL,
'execution_hint',
EXECUTION_HINT_SET
);
/**
* A filtering aggregation used to limit any sub aggregations' processing
* to a sample of the top-scoring documents. Diversity settings
* are used to limit the number of matches that share a common value such as an "author".
*
* [Elasticsearch reference](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-diversified-sampler-aggregation.html)
*
* @example
* const reqBody = esb.requestBodySearch()
* .query(esb.queryStringQuery('tags:elasticsearch'))
* .agg(
* esb.diversifiedSamplerAggregation('my_unbiased_sample', 'author')
* .shardSize(200)
* .agg(
* esb.significantTermsAggregation(
* 'keywords',
* 'tags'
* ).exclude(['elasticsearch'])
* )
* );
*
* @example
* // Use a script to produce a hash of the multiple values in a tags field
* // to ensure we don't have a sample that consists of the same repeated
* // combinations of tags
* const reqBody = esb.requestBodySearch()
* .query(esb.queryStringQuery('tags:kibana'))
* .agg(
* esb.diversifiedSamplerAggregation('my_unbiased_sample')
* .shardSize(200)
* .maxDocsPerValue(3)
* .script(esb.script('inline', "doc['tags'].values.hashCode()"))
* .agg(
* esb.significantTermsAggregation(
* 'keywords',
* 'tags'
* ).exclude(['kibana'])
* )
* );
*
* @param {string} name The name which will be used to refer to this aggregation.
* @param {string=} field The field to aggregate on
*
* @extends BucketAggregationBase
*/
class DiversifiedSamplerAggregation extends BucketAggregationBase {
// eslint-disable-next-line require-jsdoc
constructor(name, field) {
super(name, 'diversified_sampler', field);
}
/**
* The shard_size parameter limits how many top-scoring documents
* are collected in the sample processed on each shard. The default value is 100.
*
* @param {number} size Maximum number of documents to return from each shard(Integer)
* @returns {DiversifiedSamplerAggregation} returns `this` so that calls can be chained
*/
shardSize(size) {
this._aggsDef.shard_size = size;
return this;
}
/**
* Used to control the maximum number of documents collected
* on any one shard which share a common value.
* Applies on a per-shard basis only for the purposes of shard-local sampling.
*
* @param {number} maxDocsPerValue Default 1.(Integer)
* @returns {DiversifiedSamplerAggregation} returns `this` so that calls can be chained
*/
maxDocsPerValue(maxDocsPerValue) {
this._aggsDef.max_docs_per_value = maxDocsPerValue;
return this;
}
/**
* This setting can influence the management of the values used
* for de-duplication. Each option will hold up to shard_size
* values in memory while performing de-duplication but
* the type of value held can be controlled
*
* @param {string} hint the possible values are `map`, `global_ordinals`,
* `global_ordinals_hash` and `global_ordinals_low_cardinality`
* @returns {DiversifiedSamplerAggregation} returns `this` so that calls can be chained
* @throws {Error} If Execution Hint is outside the accepted set.
*/
executionHint(hint) {
if (isNil(hint)) invalidExecutionHintParam(hint);
const hintLower = hint.toLowerCase();
if (!EXECUTION_HINT_SET.has(hintLower)) {
invalidExecutionHintParam(hint);
}
this._aggsDef.execution_hint = hintLower;
return this;
}
}
module.exports = DiversifiedSamplerAggregation;