-
Notifications
You must be signed in to change notification settings - Fork 13
/
query-access.ts
444 lines (374 loc) · 14.8 KB
/
query-access.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
import * as ts from '@terascope/utils';
import * as p from 'xlucene-parser';
import {
SortOrder,
GeoDistanceUnit,
xLuceneVariables,
xLuceneTypeConfig,
xLuceneFieldType,
ElasticsearchDistribution,
ClientParams
} from '@terascope/types';
import { CachedTranslator } from '../translator';
import * as i from './interfaces';
export class QueryAccess<T extends ts.AnyObject = ts.AnyObject> {
readonly excludes: (keyof T)[];
readonly includes: (keyof T)[];
readonly constraints?: string[];
readonly preventPrefixWildcard: boolean;
readonly allowImplicitQueries: boolean;
readonly defaultGeoField?: string;
readonly defaultGeoSortOrder?: SortOrder;
readonly defaultGeoSortUnit?: GeoDistanceUnit|string;
readonly allowEmpty: boolean;
readonly typeConfig: xLuceneTypeConfig;
readonly parsedTypeConfig: xLuceneTypeConfig;
readonly variables: xLuceneVariables;
readonly filterNilVariables: boolean;
private readonly _parser: p.CachedParser = new p.CachedParser();
private readonly _translator: CachedTranslator = new CachedTranslator();
constructor(config: i.QueryAccessConfig<T> = {}, options: i.QueryAccessOptions = {}) {
const {
excludes = [],
includes = [],
constraint,
allow_empty_queries: allowEmpty = true,
} = config;
const typeConfig = config.type_config || options.type_config || {};
const variables = options.variables || {};
if (ts.isEmpty(typeConfig)) throw new Error('Configuration for type_config must be provided');
this.typeConfig = { ...typeConfig };
this.excludes = excludes?.slice();
this.includes = includes?.slice();
this.constraints = ts.castArray(constraint).filter(Boolean) as string[];
this.allowEmpty = Boolean(allowEmpty);
this.preventPrefixWildcard = Boolean(config.prevent_prefix_wildcard);
this.allowImplicitQueries = Boolean(config.allow_implicit_queries);
this.defaultGeoField = config.default_geo_field;
this.defaultGeoSortOrder = config.default_geo_sort_order;
this.defaultGeoSortUnit = config.default_geo_sort_unit;
this.parsedTypeConfig = this._restrictTypeConfig();
this.variables = variables;
this.filterNilVariables = !!options.filterNilVariables;
}
clearCache(): void {
this._parser.reset();
this._translator.reset();
}
/**
* Validate and restrict a xlucene query
*
* @returns a restricted xlucene query
*/
restrict(q: string): string {
return this._restrict(q).query;
}
/**
* Validate and restrict a xlucene query
*
* @returns a restricted xlucene query
*/
private _restrict(q: string, _overrideParsedQuery?: p.Node): p.Parser {
let parser: p.Parser;
const parserOptions: p.ParserOptions = {
type_config: this.typeConfig,
variables: this.variables,
filterNilVariables: this.filterNilVariables
};
try {
parser = this._parser.make(q, parserOptions, _overrideParsedQuery);
} catch (err) {
throw new ts.TSError(err, {
reason: 'Query could not be parsed',
statusCode: 422,
context: {
q,
safe: true
}
});
}
if (p.isEmptyNode(parser.ast)) {
if (!this.allowEmpty) {
throw new ts.TSError('Empty queries are restricted', {
statusCode: 403,
context: {
q,
safe: true
}
});
}
return this._addConstraints(parser, parserOptions);
}
parser.forTermTypes((node: p.TermLikeNode) => {
// restrict when a term is specified without a field
if (!node.field) {
if (this.allowImplicitQueries) return;
throw new ts.TSError('Implicit fields are restricted, please specify the field', {
statusCode: 403,
context: {
q,
safe: true
}
});
}
if (this._isFieldRestricted(node.field)) {
throw new ts.TSError(`Field ${node.field} in query is restricted`, {
statusCode: 403,
context: {
q,
safe: true
}
});
}
if (this.preventPrefixWildcard) {
const isWildcardNode = p.isWildcard(node);
const isRegexpNode = p.isRegexp(node);
if (isWildcardNode || isRegexpNode) {
const value = p.getFieldValue(node.value, this.variables);
if (startsWithWildcard(value, node.type)) {
const errMessage = node.type === p.NodeType.Wildcard
? "Queries starting with wildcards in the form 'fieldname:*value' or 'fieldname:?value' are restricted"
: "Regular expression queries starting with wildcards in the form 'fieldname:/.*value/' or 'fieldname:/.?value/' are restricted";
throw new ts.TSError(errMessage, {
statusCode: 403,
context: {
q,
safe: true
}
});
}
if (isRegexpNode && hasNonGuaranteedMatch(value)) {
throw new ts.TSError("Regular expression queries with non-guaranteed matches in the form 'fieldname:/v*/' or 'fieldname:/v{0,1}/' are restricted", {
statusCode: 403,
context: {
q,
safe: true
}
});
}
}
}
});
return this._addConstraints(parser, parserOptions);
}
private _restrictTypeConfig(): xLuceneTypeConfig {
const parsedConfig: xLuceneTypeConfig = {};
for (const [typeField, value] of Object.entries(this.typeConfig)) {
const excluded = this.excludes.filter((restrictField) => matchTypeField(
typeField,
restrictField as string
));
if (excluded.length) continue;
if (this.includes.length) {
const included = this.includes.filter((restrictField) => matchTypeField(
typeField,
restrictField as string
));
if (!included.length) continue;
}
parsedConfig[typeField] = value;
}
return parsedConfig;
}
/**
* Converts a restricted xlucene query to an elasticsearch search query
*
* @returns a restricted elasticsearch search query
*/
async restrictSearchQuery(
query: string,
opts?: i.RestrictSearchQueryOptions,
_overrideParsedQuery?: p.Node
): Promise<ClientParams.SearchParams> {
const {
params: _params = {},
majorVersion = 6,
minorVersion = 8,
distribution = ElasticsearchDistribution.elasticsearch,
version = '6.8.6',
...options
} = opts ?? {};
const translateOptions = {
...options,
distribution,
majorVersion,
minorVersion,
version
};
const variables = Object.assign({}, this.variables, opts?.variables ?? {});
if (_params._source) {
throw new Error('Cannot include _source in params, use _sourceInclude or _sourceExclude');
}
const params = { ..._params };
const parser = this._restrict(query, _overrideParsedQuery);
await ts.pImmediate();
const translator = this._translator.make(parser, {
type_config: this.parsedTypeConfig,
default_geo_field: this.defaultGeoField,
default_geo_sort_order: this.defaultGeoSortOrder,
default_geo_sort_unit: this.defaultGeoSortUnit,
variables,
filterNilVariables: this.filterNilVariables
});
const translated = translator.toElasticsearchDSL(translateOptions);
// keep _sourceInclude && _sourceExclude for backward compatibility
const {
_sourceInclude, _source_includes,
_sourceExclude, _source_excludes,
...parsedParams
} = params as any;
const sourceIncludes = _sourceInclude ?? _source_includes;
const sourceExcludes = _sourceExclude ?? _source_excludes;
const { includes, excludes } = this.restrictSourceFields(
sourceIncludes as (keyof T)[],
sourceExcludes as (keyof T)[]
);
// we can remove this logic when we can get rid of legacy client
const isLegacy = version === '6.5';
const excludesKey = isLegacy ? '_sourceExclude' : '_source_excludes';
const includesKey = isLegacy ? '_sourceInclude' : '_source_includes';
const searchParams: ClientParams.SearchParams = {
...parsedParams,
body: { ...parsedParams.body, ...translated },
[excludesKey]: excludes,
[includesKey]: includes,
};
if (searchParams != null) { delete searchParams.q; }
return searchParams;
}
/**
* Restrict requested source to all or subset of the ones available
*
* **NOTE:** this will remove restricted fields and will not throw
*/
restrictSourceFields(includes?: (keyof T)[], excludes?: (keyof T)[]): {
includes: (keyof T)[]|undefined,
excludes: (keyof T)[]|undefined,
} {
const all = Object.keys(this.parsedTypeConfig)
.map((field) => field.split('.', 1)[0]) as (keyof T)[];
const _includes = this._getSourceFields('includes', this.includes, all, includes);
const _excludes = this._getSourceFields('excludes', this.excludes, all, excludes);
// if there's restricted includes fields (or if not but user requested included fields)
// then _includes should have length, if not we'd override original restrictions if we
// sent [] and expose all fields, so just exclude all since requested fields not found.
const invalid = (this.includes.length || includes?.length) ? !_includes?.length : false;
return {
includes: _includes,
excludes: invalid ? ['*'] : _excludes,
};
}
private _getSourceFields(
type: 'includes'|'excludes',
restricted: (keyof T)[],
all: (keyof T)[],
override?: (keyof T)[] | boolean | (keyof T),
): (keyof T)[] | undefined {
const fields = ts.uniq(ts.parseList(override) as (keyof T)[]);
if (fields.length) {
if (restricted.length) {
// combine already excluded fields with new ones
if (type === 'excludes') {
return ts.uniq(restricted.concat(fields));
}
// reduce already restricted includes to the overrides
return restricted.filter((field) => fields.includes(field));
}
if (all.length) {
return fields.filter((field) => all.includes(field));
}
return fields;
}
return restricted.slice();
}
private _isFieldRestricted(field: string): boolean {
return !Object.entries(this.parsedTypeConfig).some(([typeField, fieldType]) => {
if (fieldType === xLuceneFieldType.Object) return false;
const parts = typeField.split('.');
if (parts.length > 1) {
const firstPart = parts.slice(0, -1).join('.');
if (this.typeConfig[firstPart] === xLuceneFieldType.Object) {
return matchFieldObject(typeField, field);
}
}
return matchField(typeField, field);
});
}
private _addConstraints(parser: p.Parser, options: p.ParserOptions): p.Parser {
if (this.constraints?.length) {
const queries = ts.concat(this.constraints, [parser.query]).filter(Boolean) as string[];
if (queries.length === 1) return this._parser.make(queries[0], options);
return this._parser.make(`(${queries.join(') AND (')})`, options);
}
return parser;
}
}
function matchFieldObject(typeField: string, field: string) {
let s = '';
for (const part of typeField.split('.')) {
s += part;
if (ts.matchWildcard(field, s)) {
return true;
}
s += '.';
}
return false;
}
function matchField(typeField: string, field: string) {
let s = '';
for (const part of field.split('.')) {
s += part;
if (ts.matchWildcard(s, typeField)) {
return true;
}
s += '.';
}
return false;
}
function matchTypeField(typeField: string, restrictField: string) {
let s = '';
for (const part of typeField.split('.')) {
s += part;
if (s === restrictField) {
return true;
}
s += '.';
}
return false;
}
/**
* Regular expression ES standard operators to avoid starting a query with...
* left off closing braces but included all others even though some aren't likely
* to be used as the first character in a query (i.e. | and { )
*/
const standardOperators = ['.', '?', '+', '*', '(', '[', '|', '{'];
/**
* Regular expression ES optional operators - included all even though we don't allow all
*/
const optionalOperators = ['~', '#', '<', '&', '@'];
/**
* Standard and optional regex characters to avoid starting a query with since starting
* a query with wildcards makes a query heavy as all terms in the index need to be searched
*/
const regexWildcard = standardOperators.concat(optionalOperators);
function startsWithWildcard(input?: string | number, nodeType = p.NodeType.Wildcard) {
if (!input) return false;
if (!ts.isString(input)) return false;
if (nodeType === p.NodeType.Regexp) {
return regexWildcard.includes(ts.getFirstChar(input));
}
return ['*', '?'].includes(ts.getFirstChar(input));
}
/**
* Whether full index will be searched...
* ab* and a+ require a match on a, but a*, a?, a{0,1}
* do not so could search the whole index
*/
function hasNonGuaranteedMatch(input?: string | number) {
if (!input) return false;
if (!ts.isString(input)) return false;
// get the second characters
const trimmed = input.trim().charAt(1);
if (['*', '?', '{'].includes(trimmed[0])) return true;
return false;
}