-
Notifications
You must be signed in to change notification settings - Fork 99
/
flatten.go
376 lines (324 loc) · 10.8 KB
/
flatten.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
// Package dataflatten contains tools to flatten complex data
// structures.
//
// On macOS, many plists use an array of maps, these can be tricky to
// filter. This package knows how to flatten that structure, as well
// as rewriting it as a nested array, or filtering it. It is akin to
// xpath, though simpler.
//
// This tool works primarily through string interfaces, so type
// information may be lost.
//
// Query Syntax
//
// The query syntax handles both filtering and basic rewriting. It is
// not perfect. The idea behind it, is that we descend through an data
// structure, specifying what matches at each level.
//
// Each level of query can do:
// * specify a filter, this is a simple string match with wildcard support. (prefix and/or postfix, but not infix)
// * If the data is an array, specify an index
// * For array-of-maps, specify a key to rewrite as a nested map
//
// Each query term has 3 parts: [#]string[=>kvmatch]
// 1. An optional `#` This denotes a key to rewrite an array-of-maps with
// 2. A search term. If this is an integer, it is interpreted as an array index.
// 3. a key/value match string. For a map, this is to match the value of a key.
//
// Some examples:
// * data/users Return everything under { data: { users: { ... } } }
// * data/users/0 Return the first item in the users array
// * data/users/name=>A* Return users whose name starts with "A"
// * data/users/#id Return the users, and rewrite the users array to be a map with the id as the key
//
// See the test suite for extensive examples.
package dataflatten
import (
"strconv"
"strings"
"time"
"github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level"
"github.com/pkg/errors"
)
// Flattener is an interface to flatten complex, nested, data
// structures. It recurses through them, and returns a simplified
// form. At the simplest level, this rewrites:
//
// { foo: { bar: { baz: 1 } } }
//
// To:
//
// [ { path: foo/bar/baz, value: 1 } ]
//
// It can optionally filtering and rewriting.
type Flattener struct {
includeNils bool
rows []Row
logger log.Logger
query []string
queryWildcard string
queryKeyDenoter string
}
type FlattenOpts func(*Flattener)
// IncludeNulls indicates that Flatten should return null values,
// instead of skipping over them.
func IncludeNulls() FlattenOpts {
return func(fl *Flattener) {
fl.includeNils = true
}
}
// WithLogger sets the logger to use
func WithLogger(logger log.Logger) FlattenOpts {
return func(fl *Flattener) {
fl.logger = logger
}
}
// WithQuery Specifies a query to flatten with. This is used both for
// re-writing arrays into maps, and for filtering. See "Query
// Specification" for docs.
func WithQuery(q []string) FlattenOpts {
return func(fl *Flattener) {
fl.query = q
}
}
// Flatten is the entry point to the Flattener functionality.
func Flatten(data interface{}, opts ...FlattenOpts) ([]Row, error) {
fl := &Flattener{
rows: []Row{},
logger: log.NewNopLogger(),
queryWildcard: `*`,
queryKeyDenoter: `#`,
}
for _, opt := range opts {
opt(fl)
}
if err := fl.descend([]string{}, data, 0); err != nil {
return nil, err
}
return fl.rows, nil
}
// descend recurses through a given data structure flattening along the way.
func (fl *Flattener) descend(path []string, data interface{}, depth int) error {
queryTerm, isQueryMatched := fl.queryAtDepth(depth)
logger := log.With(fl.logger,
"caller", "descend",
"depth", depth,
"rows-so-far", len(fl.rows),
"query", queryTerm,
"path", strings.Join(path, "/"),
)
switch v := data.(type) {
case []interface{}:
for i, e := range v {
pathKey := strconv.Itoa(i)
level.Debug(logger).Log("msg", "checking an array", "indexStr", pathKey)
// If the queryTerm starts with
// queryKeyDenoter, then we want to rewrite
// the path based on it. Note that this does
// no sanity checking. Multiple values will
// re-write. If the value isn't there, you get
// nothing. Etc.
//
// keyName == "name"
// keyValue == "alex" (need to test this againsty queryTerm
// pathKey == What we descend with
if strings.HasPrefix(queryTerm, fl.queryKeyDenoter) {
keyQuery := strings.SplitN(strings.TrimPrefix(queryTerm, fl.queryKeyDenoter), "=>", 2)
keyName := keyQuery[0]
innerlogger := log.With(logger, "arraykeyname", keyName)
level.Debug(logger).Log("msg", "attempting to coerce array into map")
e, ok := e.(map[string]interface{})
if !ok {
level.Debug(innerlogger).Log("msg", "can't coerce into map")
continue
}
// Is keyName in this array?
val, ok := e[keyName]
if !ok {
level.Debug(innerlogger).Log("msg", "keyName not in map")
continue
}
pathKey, ok = val.(string)
if !ok {
level.Debug(innerlogger).Log("msg", "can't coerce pathKey val into string")
continue
}
// Looks good to descend. we're overwritten both e and pathKey. Exit this conditional.
}
if !(isQueryMatched || fl.queryMatchArrayElement(e, i, queryTerm)) {
level.Debug(logger).Log("msg", "query not matched")
continue
}
if err := fl.descend(append(path, pathKey), e, depth+1); err != nil {
return errors.Wrap(err, "flattening array")
}
}
case map[string]interface{}:
level.Debug(logger).Log("msg", "checking a map", "path", strings.Join(path, "/"))
for k, e := range v {
// Check that the key name matches. If not, skip this entire
// branch of the map
if !(isQueryMatched || fl.queryMatchString(k, queryTerm)) {
continue
}
if err := fl.descend(append(path, k), e, depth+1); err != nil {
return errors.Wrap(err, "flattening map")
}
}
case nil:
// Because we want to filter nils out, we do _not_ examine isQueryMatched here
if !(fl.queryMatchNil(queryTerm)) {
level.Debug(logger).Log("msg", "query not matched")
return nil
}
fl.rows = append(fl.rows, Row{Path: path, Value: ""})
default:
// non-iterable. stringify and be done
stringValue, err := stringify(v)
if err != nil {
return errors.Wrapf(err, "flattening at path %v", path)
}
if !(isQueryMatched || fl.queryMatchString(stringValue, queryTerm)) {
level.Debug(logger).Log("msg", "query not matched")
return nil
}
fl.rows = append(fl.rows, Row{Path: path, Value: stringValue})
}
return nil
}
func (fl *Flattener) queryMatchNil(queryTerm string) bool {
// TODO: If needed, we could use queryTerm for optional nil filtering
return fl.includeNils
}
// queryMatchArrayElement matches arrays. This one is magic.
//
// Syntax:
// #i -- Match index i. For example `#0`
// k=>queryTerm -- If this is a map, it should have key k, that matches queryTerm
//
// We use `=>` as something that is reasonably intuitive, and not very
// likely to occur on it's own. Unfortunately, `==` shows up in base64
func (fl *Flattener) queryMatchArrayElement(data interface{}, arrIndex int, queryTerm string) bool {
logger := log.With(fl.logger,
"caller", "queryMatchArrayElement",
"rows-so-far", len(fl.rows),
"query", queryTerm,
"arrIndex", arrIndex,
)
// strip off the key re-write denotation before trying to match
queryTerm = strings.TrimPrefix(queryTerm, fl.queryKeyDenoter)
if queryTerm == fl.queryWildcard {
return true
}
// If the queryTerm is an int, then we expect to match the index
if queryIndex, err := strconv.Atoi(queryTerm); err == nil {
level.Debug(logger).Log("msg", "using numeric index comparison")
return queryIndex == arrIndex
}
level.Debug(logger).Log("msg", "checking data type")
switch dataCasted := data.(type) {
case []interface{}:
// fails. We can't match an array that has arrays as elements. Use a wildcard
return false
case map[string]interface{}:
kvQuery := strings.SplitN(queryTerm, "=>", 2)
// If this is one long, then we're testing for whether or not there's a key with this name,
if len(kvQuery) == 1 {
_, ok := dataCasted[kvQuery[0]]
return ok
}
// Else see if the value matches
for k, v := range dataCasted {
// Since this needs to check against _every_
// member, return true. Or fall through to the
// false.
if fl.queryMatchString(k, kvQuery[0]) && fl.queryMatchStringify(v, kvQuery[1]) {
return true
}
}
return false
default:
// non-iterable. stringify and be done
return fl.queryMatchStringify(dataCasted, queryTerm)
}
}
func (fl *Flattener) queryMatchStringify(data interface{}, queryTerm string) bool {
// strip off the key re-write denotation before trying to match
queryTerm = strings.TrimPrefix(queryTerm, fl.queryKeyDenoter)
if queryTerm == fl.queryWildcard {
return true
}
if data == nil {
return fl.queryMatchNil(queryTerm)
}
stringValue, err := stringify(data)
if err != nil {
return false
}
return fl.queryMatchString(stringValue, queryTerm)
}
func (fl *Flattener) queryMatchString(v, queryTerm string) bool {
if queryTerm == fl.queryWildcard {
return true
}
// Some basic string manipulations to handle prefix and suffix operations
switch {
case strings.HasPrefix(queryTerm, fl.queryWildcard) && strings.HasSuffix(queryTerm, fl.queryWildcard):
queryTerm = strings.TrimPrefix(queryTerm, fl.queryWildcard)
queryTerm = strings.TrimSuffix(queryTerm, fl.queryWildcard)
return strings.Contains(v, queryTerm)
case strings.HasPrefix(queryTerm, fl.queryWildcard):
queryTerm = strings.TrimPrefix(queryTerm, fl.queryWildcard)
return strings.HasSuffix(v, queryTerm)
case strings.HasSuffix(queryTerm, fl.queryWildcard):
queryTerm = strings.TrimSuffix(queryTerm, fl.queryWildcard)
return strings.HasPrefix(v, queryTerm)
}
return v == queryTerm
}
// queryAtDepth returns the query parameter for a given depth, and
// boolean indicating we've run out of queries. If we've run out of
// queries, than we can start checking, everything is a match.
func (fl *Flattener) queryAtDepth(depth int) (string, bool) {
// if we're nil, there's an implied wildcard
//
// This works because:
// []string is len 0, and nil
// []string{} is len 0, but not nil
if fl.query == nil {
return fl.queryWildcard, true
}
// If there's no query for this depth, then there's an implied
// wildcard. This allows the query to specify prefixes.
if depth+1 > len(fl.query) {
return fl.queryWildcard, true
}
q := fl.query[depth]
return q, q == fl.queryWildcard
}
// stringify takes an arbitrary piece of data, and attempst to coerce
// it into a string.
func stringify(data interface{}) (string, error) {
switch v := data.(type) {
case nil:
return "", nil
case string:
return v, nil
case []byte:
return string(v), nil
case uint64:
return strconv.FormatUint(v, 10), nil
case float64:
return strconv.FormatFloat(v, 'f', -1, 64), nil
case int:
return strconv.Itoa(v), nil
case bool:
return strconv.FormatBool(v), nil
case time.Time:
return strconv.FormatInt(v.Unix(), 10), nil
default:
//spew.Dump(data)
return "", errors.Errorf("unknown type on %v", data)
}
}