-
Notifications
You must be signed in to change notification settings - Fork 175
/
expression.py
173 lines (130 loc) · 4.31 KB
/
expression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# This file is part of the Open Data Cube, see https://opendatacube.org for more information
#
# Copyright (c) 2015-2023 ODC Contributors
# SPDX-License-Identifier: Apache-2.0
"""
Search expression parsing for command line applications.
Four types of expressions are available:
FIELD = VALUE
FIELD in DATE-RANGE
FIELD in [START, END]
TIME > DATE
TIME < DATE
Where DATE or DATE-RANGE is one of YYYY, YYYY-MM or YYYY-MM-DD
and START, END are either numbers or dates.
"""
# flake8: noqa
from lark import Lark, v_args, Transformer
from datacube.api.query import _time_to_search_dims
from datacube.model import Range
search_grammar = r"""
start: expression*
?expression: equals_expr
| time_in_expr
| field_in_expr
| time_gt_expr
| time_lt_expr
equals_expr: field "=" value
time_in_expr: time "in" date_range
field_in_expr: field "in" "[" orderable "," orderable "]"
time_gt_expr: time ">" date_gt
time_lt_expr: time "<" date_lt
field: FIELD
time: TIME
?orderable: INT -> integer
| SIGNED_NUMBER -> number
?value: INT -> integer
| SIGNED_NUMBER -> number
| ESCAPED_STRING -> string
| SIMPLE_STRING -> simple_string
| URL_STRING -> url_string
| UUID -> simple_string
?date_range: date -> single_date
| "[" date "," date "]" -> date_pair
date_gt: date -> range_lower_bound
date_lt: date -> range_upper_bound
date: YEAR ["-" MONTH ["-" DAY ]]
TIME: "time"
FIELD: /[a-zA-Z][\w\d_]*/
YEAR: DIGIT ~ 4
MONTH: DIGIT ~ 1..2
DAY: DIGIT ~ 1..2
SIMPLE_STRING: /[a-zA-Z][\w._-]*/ | /[0-9]+[\w_-][\w._-]*/
URL_STRING: /[a-z0-9+.-]+:\/\/([:\/\w._-])*/
UUID: HEXDIGIT~8 "-" HEXDIGIT~4 "-" HEXDIGIT~4 "-" HEXDIGIT~4 "-" HEXDIGIT~12
%import common.ESCAPED_STRING
%import common.SIGNED_NUMBER
%import common.INT
%import common.DIGIT
%import common.HEXDIGIT
%import common.CNAME
%import common.WS
%ignore WS
"""
def identity(x):
return x
@v_args(inline=True)
class TreeToSearchExprs(Transformer):
# Convert the expressions
def equals_expr(self, field, value):
return {str(field): value}
def field_in_expr(self, field, lower, upper):
return {str(field): Range(lower, upper)}
def time_in_expr(self, time_field, date_range):
return {str(time_field): date_range}
def time_gt_expr(self, time_field, date_gt):
return {str(time_field): date_gt}
def time_lt_expr(self, time_field, date_lt):
return {str(time_field): date_lt}
# Convert the literals
def string(self, val):
return str(val[1:-1])
simple_string = url_string = field = time = str
number = float
integer = int
value = identity
def single_date(self, date):
return _time_to_search_dims(date)
def date_pair(self, start, end):
return _time_to_search_dims((start, end))
def range_lower_bound(self, date):
return _time_to_search_dims((date, None))
def range_upper_bound(self, date):
return _time_to_search_dims((None, date))
def date(self, y, m=None, d=None):
return "-".join(x for x in [y, m, d] if x is not None)
# Merge everything into a single dict
def start(self, *search_exprs):
combined = {}
for expr in search_exprs:
combined.update(expr)
return combined
def parse_expressions(*expression_text):
expr_parser = Lark(search_grammar)
tree = expr_parser.parse(' '.join(expression_text))
return TreeToSearchExprs().transform(tree)
def main():
expr_parser = Lark(search_grammar)
sample_inputs = """platform = "LANDSAT_8"
platform = "LAND SAT_8"
platform = 4
lat in [4, 6]
time in [2014, 2014]
time in [2014-03-01, 2014-04-01]
time in 2014-03-02
time in 2014-3-2
time in 2014-3
time in 2014
platform = LANDSAT_8
lat in [4, 6] time in 2014-03-02
platform=LS8 lat in [-14, -23.5] instrument="OTHER"
""".strip().split('\n')
for sample in sample_inputs:
transformer = TreeToSearchExprs()
tree = expr_parser.parse(sample)
print(sample)
print(tree)
print(transformer.transform(tree))
print()
if __name__ == '__main__':
main()