forked from pingcap/tidb
-
Notifications
You must be signed in to change notification settings - Fork 0
/
path_expr.go
209 lines (185 loc) · 6.75 KB
/
path_expr.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
// Copyright 2017 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
package json
import (
"regexp"
"strconv"
"strings"
"github.com/juju/errors"
)
/*
From MySQL 5.7, JSON path expression grammar:
pathExpression ::= scope (pathLeg)*
scope ::= [ columnReference ] '$'
columnReference ::= // omit...
pathLeg ::= member | arrayLocation | '**'
member ::= '.' (keyName | '*')
arrayLocation ::= '[' (non-negative-integer | '*') ']'
keyName ::= ECMAScript-identifier | ECMAScript-string-literal
And some implementation limits in MySQL 5.7:
1) columnReference in scope must be empty now;
2) double asterisk(**) could not be last leg;
Examples:
select json_extract('{"a": "b", "c": [1, "2"]}', '$.a') -> "b"
select json_extract('{"a": "b", "c": [1, "2"]}', '$.c') -> [1, "2"]
select json_extract('{"a": "b", "c": [1, "2"]}', '$.a', '$.c') -> ["b", [1, "2"]]
select json_extract('{"a": "b", "c": [1, "2"]}', '$.c[0]') -> 1
select json_extract('{"a": "b", "c": [1, "2"]}', '$.c[2]') -> NULL
select json_extract('{"a": "b", "c": [1, "2"]}', '$.c[*]') -> [1, "2"]
select json_extract('{"a": "b", "c": [1, "2"]}', '$.*') -> ["b", [1, "2"]]
*/
// [a-zA-Z_][a-zA-Z0-9_]* matches any identifier;
// "[^"\\]*(\\.[^"\\]*)*" matches any string literal which can carry escaped quotes;
var jsonPathExprLegRe = regexp.MustCompile(`(\.\s*([a-zA-Z_][a-zA-Z0-9_]*|\*|"[^"\\]*(\\.[^"\\]*)*")|(\[\s*([0-9]+|\*)\s*\])|\*\*)`)
type pathLegType byte
const (
// pathLegKey indicates the path leg with '.key'.
pathLegKey pathLegType = 0x01
// pathLegIndex indicates the path leg with form '[number]'.
pathLegIndex pathLegType = 0x02
// pathLegDoubleAsterisk indicates the path leg with form '**'.
pathLegDoubleAsterisk pathLegType = 0x03
)
// pathLeg is only used by PathExpression.
type pathLeg struct {
typ pathLegType
arrayIndex int // if typ is pathLegIndex, the value should be parsed into here.
dotKey string // if typ is pathLegKey, the key should be parsed into here.
}
// arrayIndexAsterisk is for parsing `*` into a number.
// we need this number represent "all".
const arrayIndexAsterisk = -1
// pathExpressionFlag holds attributes of PathExpression
type pathExpressionFlag byte
const (
pathExpressionContainsAsterisk pathExpressionFlag = 0x01
pathExpressionContainsDoubleAsterisk pathExpressionFlag = 0x02
)
// containsAnyAsterisk returns true if pef contains any asterisk.
func (pef pathExpressionFlag) containsAnyAsterisk() bool {
pef &= pathExpressionContainsAsterisk | pathExpressionContainsDoubleAsterisk
return byte(pef) != 0
}
// PathExpression is for JSON path expression.
type PathExpression struct {
legs []pathLeg
flags pathExpressionFlag
}
// popOneLeg returns a pathLeg, and a child PathExpression without that leg.
func (pe PathExpression) popOneLeg() (pathLeg, PathExpression) {
newPe := PathExpression{
legs: pe.legs[1:],
flags: 0,
}
for _, leg := range newPe.legs {
if leg.typ == pathLegIndex && leg.arrayIndex == -1 {
newPe.flags |= pathExpressionContainsAsterisk
} else if leg.typ == pathLegKey && leg.dotKey == "*" {
newPe.flags |= pathExpressionContainsAsterisk
} else if leg.typ == pathLegDoubleAsterisk {
newPe.flags |= pathExpressionContainsDoubleAsterisk
}
}
return pe.legs[0], newPe
}
// popOneLastLeg returns the a parent PathExpression and the last pathLeg
func (pe PathExpression) popOneLastLeg() (PathExpression, pathLeg) {
lastLegIdx := len(pe.legs) - 1
lastLeg := pe.legs[lastLegIdx]
// It is used only in modification, it has been checked that there is no asterisks.
return PathExpression{legs: pe.legs[:lastLegIdx]}, lastLeg
}
// ParseJSONPathExpr parses a JSON path expression. Returns a PathExpression
// object which can be used in JSON_EXTRACT, JSON_SET and so on.
func ParseJSONPathExpr(pathExpr string) (pe PathExpression, err error) {
// Find the position of first '$'. If any no-blank characters in
// pathExpr[0: dollarIndex), return an ErrInvalidJSONPath error.
dollarIndex := strings.Index(pathExpr, "$")
if dollarIndex < 0 {
err = ErrInvalidJSONPath.GenByArgs(pathExpr)
return
}
for i := 0; i < dollarIndex; i++ {
if !isBlank(rune(pathExpr[i])) {
err = ErrInvalidJSONPath.GenByArgs(pathExpr)
return
}
}
pathExprSuffix := strings.TrimFunc(pathExpr[dollarIndex+1:], isBlank)
indices := jsonPathExprLegRe.FindAllStringIndex(pathExprSuffix, -1)
if len(indices) == 0 && len(pathExprSuffix) != 0 {
err = ErrInvalidJSONPath.GenByArgs(pathExpr)
return
}
pe.legs = make([]pathLeg, 0, len(indices))
pe.flags = pathExpressionFlag(0)
lastEnd := 0
for _, indice := range indices {
start, end := indice[0], indice[1]
// Check all characters between two legs are blank.
for i := lastEnd; i < start; i++ {
if !isBlank(rune(pathExprSuffix[i])) {
err = ErrInvalidJSONPath.GenByArgs(pathExpr)
return
}
}
lastEnd = end
if pathExprSuffix[start] == '[' {
// The leg is an index of a JSON array.
var leg = strings.TrimFunc(pathExprSuffix[start+1:end], isBlank)
var indexStr = strings.TrimFunc(leg[0:len(leg)-1], isBlank)
var index int
if len(indexStr) == 1 && indexStr[0] == '*' {
pe.flags |= pathExpressionContainsAsterisk
index = arrayIndexAsterisk
} else {
if index, err = strconv.Atoi(indexStr); err != nil {
err = errors.Trace(err)
return
}
}
pe.legs = append(pe.legs, pathLeg{typ: pathLegIndex, arrayIndex: index})
} else if pathExprSuffix[start] == '.' {
// The leg is a key of a JSON object.
var key = strings.TrimFunc(pathExprSuffix[start+1:end], isBlank)
if len(key) == 1 && key[0] == '*' {
pe.flags |= pathExpressionContainsAsterisk
} else if key[0] == '"' {
// We need unquote the origin string.
if key, err = unquoteString(key[1 : len(key)-1]); err != nil {
err = ErrInvalidJSONPath.GenByArgs(pathExpr)
return
}
}
pe.legs = append(pe.legs, pathLeg{typ: pathLegKey, dotKey: key})
} else {
// The leg is '**'.
pe.flags |= pathExpressionContainsDoubleAsterisk
pe.legs = append(pe.legs, pathLeg{typ: pathLegDoubleAsterisk})
}
}
if len(pe.legs) > 0 {
// The last leg of a path expression cannot be '**'.
if pe.legs[len(pe.legs)-1].typ == pathLegDoubleAsterisk {
err = ErrInvalidJSONPath.GenByArgs(pathExpr)
return
}
}
return
}
func isBlank(c rune) bool {
if c == '\n' || c == '\r' || c == '\t' || c == ' ' {
return true
}
return false
}