/
flat.go
178 lines (147 loc) · 4.55 KB
/
flat.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
package storage
import (
"bytes"
"encoding/json"
"io"
"sort"
"strconv"
"strings"
"unicode"
"github.com/renbou/loggo/internal/storage/models"
)
var (
jsonBoolStrs = map[bool]string{false: "false", true: "true"}
jsonNullStr = "null"
)
type flattenDecoder struct {
*json.Decoder
message models.FlatMessage
}
// flatten flattens a JSON message by unnesting all of its levels.
// Arrays elements are assigned their indices as their keys.
// Note that non-object/non-array messages are not mapped to any key,
// allowing only for non-scoped searches in the storage.
// If an error is encountered while iterating through the message,
// an empty FlatMessage is returned, just like for non-object/non-array messages.
func flatten(message []byte) *models.FlatMessage {
mr := bytes.NewReader(message)
// UseNumber is needed to avoid screwing with the number representations
decoder := flattenDecoder{Decoder: json.NewDecoder(mr)}
decoder.UseNumber()
if ok := decoder.flatten("", false); !ok {
return &models.FlatMessage{}
}
// Make sure there's no superfluous input left,
// it's better to show an error somewhere later than to silently lose data
_, err := decoder.Token()
if err != io.EOF {
return &models.FlatMessage{}
}
// Sort fields by key to allow binary search for fast access later
fields := decoder.message.Fields
sort.Slice(fields, func(i, j int) bool {
return fields[i].Key < fields[j].Key
})
return &models.FlatMessage{Fields: fields}
}
// flatten here is called recursively after preparing the decoder,
// any error returned in this call or its descendants results in an empty FlatMessage
func (d *flattenDecoder) flatten(key string, nested bool) bool {
token, err := d.Token()
if err != nil {
return false
}
switch t := token.(type) {
case json.Delim:
if t == '{' {
return d.flattenObject(key)
}
// Delim here can never be } or ], because they are validated by decoder.Token first
return d.flattenArray(key)
case bool:
d.appendIfNested(key, jsonBoolStrs[t], nested)
case json.Number:
d.appendIfNested(key, t.String(), nested)
case string:
d.appendIfNested(key, t, nested)
case nil:
d.appendIfNested(key, jsonNullStr, nested)
}
return true
}
func (d *flattenDecoder) appendIfNested(key, value string, nested bool) {
// This additional check is needed to handle cases when a non-object/non-array value
// is the root object, in which case nothing should be appended
if !nested {
return
}
d.message.Fields = append(d.message.Fields, &models.FlatMessage_KV{Key: key, Value: value})
}
func (d *flattenDecoder) flattenObject(keyPrefix string) bool {
readObjectKey := func() (string, bool) {
token, err := d.Token()
if err != nil {
return "", false
}
// json.Decoder guarantees that keys will be strings
return token.(string), true
}
return d.flattenStructure(keyPrefix, readObjectKey, '}')
}
func (d *flattenDecoder) flattenArray(keyPrefix string) bool {
var index int
nextIndex := func() (string, bool) {
cur := index
index++
// Itoa is already optimized, so for small arrays (logs shouldn't be massive, duh) this is fine
return strconv.Itoa(cur), true
}
return d.flattenStructure(keyPrefix, nextIndex, ']')
}
func (d *flattenDecoder) flattenStructure(keyPrefix string, keyFunc func() (string, bool), endDelim rune) bool {
for more := d.More(); more; more = d.More() {
// Retrieve key. In case of an object, this will read a string token,
// and in case of an array this will return the next index.
key, ok := keyFunc()
if !ok {
return false
}
// Read structure value, which can be any nested object
if ok := d.flatten(d.formatKey(keyPrefix, key), true); !ok {
return false
}
}
return d.assertDelim(endDelim)
}
func (d *flattenDecoder) assertDelim(delim rune) bool {
token, err := d.Token()
if err != nil || token != json.Delim(delim) {
return false
}
return true
}
func (d *flattenDecoder) formatKey(prefix, key string) string {
// replace invalid characters with _
key = strings.Map(func(r rune) rune {
if unicode.IsDigit(r) || unicode.IsLetter(r) || r == '_' {
return r
}
return '_'
}, key)
if prefix != "" {
return prefix + "." + key
}
return key
}
// flatMessageToMapping returns a FlatMapping which searches through the FlatMessage using binary search.
func flatMessageToMapping(message *models.FlatMessage) FlatMapping {
return func(key string) (value string, ok bool) {
i, found := sort.Find(len(message.Fields), func(i int) int {
return strings.Compare(key, message.Fields[i].Key)
})
if !found {
return "", false
}
return message.Fields[i].Value, true
}
}