-
Notifications
You must be signed in to change notification settings - Fork 1
/
transform.go
329 lines (290 loc) · 10.9 KB
/
transform.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
//
// Copyright (c) 2021 Snowplow Analytics Ltd. All rights reserved.
//
// This program is licensed to you under the Apache License Version 2.0,
// and you may not use this file except in compliance with the Apache License Version 2.0.
// You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the Apache License Version 2.0 is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
//
package analytics
import (
"fmt"
"strconv"
"strings"
"time"
jsoniter "github.com/json-iterator/go"
"github.com/pkg/errors"
)
const (
eventLength int = 131
EmptyFieldErr string = `Field is empty`
)
var json = jsoniter.Config{}.Froze()
type KeyVal struct {
Key string
Value interface{}
}
type ValueParser func(string, string) ([]KeyVal, error)
type KeyFunctionPair struct {
Key string
ParseFunction ValueParser
}
type ParsedEvent []string
func parseTime(key string, value string) ([]KeyVal, error) {
if value == "" {
return nil, errors.Wrap(errors.New("Null string found"), fmt.Sprintf("Error parsing key %s", key))
}
timeLayout := "2006-01-02 15:04:05.999"
out, err := time.Parse(timeLayout, value)
if err != nil {
return nil, errors.Wrap(err, fmt.Sprintf("Error parsing field '%s', with value '%s' to timestamp", key, value))
}
return []KeyVal{{key, out}}, err
}
func parseString(key string, value string) ([]KeyVal, error) {
if value == "" {
return nil, errors.Wrap(errors.New("Null string found"), fmt.Sprintf("Error parsing key %s", key))
}
return []KeyVal{{key, value}}, nil
}
func parseInt(key string, value string) ([]KeyVal, error) {
if value == "" {
return nil, errors.Wrap(errors.New("Null string found"), fmt.Sprintf("Error parsing key %s", key))
}
intValue, err := strconv.Atoi(value)
if err != nil {
return nil, errors.Wrap(err, fmt.Sprintf("Error parsing key '%s' to integer", key))
}
return []KeyVal{{key, intValue}}, err
}
func parseBool(key string, value string) ([]KeyVal, error) {
if value == "" {
return nil, errors.Wrap(errors.New("Null string found"), fmt.Sprintf("Error parsing key %s", key))
}
boolValue, err := strconv.ParseBool(value)
if err != nil {
return nil, errors.Wrap(err, fmt.Sprintf("Error parsing key '%s' to boolean", key))
}
return []KeyVal{{key, boolValue}}, err
}
func parseDouble(key string, value string) ([]KeyVal, error) {
if value == "" {
return nil, errors.Wrap(errors.New("Null string found"), fmt.Sprintf("Error parsing key %s", key))
}
doubleValue, err := strconv.ParseFloat(value, 64)
if err != nil {
return nil, errors.Wrap(err, fmt.Sprintf("Error parsing key '%s' to double", key))
}
return []KeyVal{{key, doubleValue}}, err
}
func parseContexts(key string, value string) ([]KeyVal, error) {
if value == "" {
return nil, errors.Wrap(errors.New("Null string found"), fmt.Sprintf("Error parsing key %s", key))
}
return shredContexts(value)
}
func parseUnstruct(key string, value string) ([]KeyVal, error) {
if value == "" {
return nil, errors.Wrap(errors.New("Null string found"), fmt.Sprintf("Error parsing key %s", key))
}
return shredUnstruct(value)
}
// ParseEvent takes a Snowplow Enriched event tsv string as input, and returns a 'ParsedEvent' typed slice of strings.
// Methods may then be called on the resulting ParsedEvent type to transform the event, or a subset of the event to Map or Json.
func ParseEvent(event string) (ParsedEvent, error) {
record := strings.Split(event, "\t")
if len(record) != eventLength {
return nil, errors.New(fmt.Sprintf("Cannot parse tsv event - wrong number of fields provided: %v", len(record)))
}
return record, nil
}
func (event ParsedEvent) mapifyGoodEvent(knownFields [131]KeyFunctionPair, addGeolocationData bool) (map[string]interface{}, error) {
if len(event) != eventLength {
return nil, errors.New(fmt.Sprintf("Cannot transform event - wrong number of fields provided: %v", len(event)))
} else {
output := make(map[string]interface{})
if addGeolocationData && event[latitudeIndex] != "" && event[longitudeIndex] != "" {
output["geo_location"] = event[latitudeIndex] + "," + event[longitudeIndex]
}
for index, value := range event {
// skip if empty
if event[index] != "" {
// apply function if not empty
kvPairs, err := knownFields[index].ParseFunction(knownFields[index].Key, value)
if err != nil {
return nil, err
}
// append all results
for _, pair := range kvPairs {
output[pair.Key] = pair.Value
}
}
}
return output, nil
}
}
// ToMap transforms a valid Snowplow ParsedEvent to a Go map.
func (event ParsedEvent) ToMap() (map[string]interface{}, error) {
return event.mapifyGoodEvent(enrichedEventFieldTypes, false)
}
// ToMapWithGeo adds the geo_location field, and transforms a valid Snowplow ParsedEvent to a Go map.
func (event ParsedEvent) ToMapWithGeo() (map[string]interface{}, error) {
return event.mapifyGoodEvent(enrichedEventFieldTypes, true)
}
// ToJson transforms a valid Snowplow ParsedEvent to a JSON object.
func (event ParsedEvent) ToJson() ([]byte, error) {
mapified, err := event.ToMap()
if err != nil {
return nil, err
}
jsonified, err := json.Marshal(mapified)
if err != nil {
return nil, errors.Wrap(err, "Error marshaling to JSON")
}
return jsonified, nil
}
// ToJsonWithGeo adds the geo_location field, and transforms a valid Snowplow ParsedEvent to a JSON object.
func (event ParsedEvent) ToJsonWithGeo() ([]byte, error) {
mapified, err := event.ToMapWithGeo()
if err != nil {
return nil, err
}
jsonified, err := json.Marshal(mapified)
if err != nil {
return nil, errors.Wrap(err, "Error marshaling to JSON")
}
return jsonified, nil
}
// getParsedValue gets a field's value from an event after parsing it with its specific ParseFunction
func (event ParsedEvent) getParsedValue(field string) ([]KeyVal, error) {
if len(event) != eventLength {
return nil, errors.New(fmt.Sprintf("Cannot get value - wrong number of fields provided: %v", len(event)))
}
index, ok := indexMap[field]
if !ok {
return nil, errors.New(fmt.Sprintf("Key %s not a valid atomic field", field))
}
if event[index] == "" {
return nil, errors.New(EmptyFieldErr)
}
kvPairs, err := enrichedEventFieldTypes[index].ParseFunction(enrichedEventFieldTypes[index].Key, event[index])
if err != nil {
return nil, err
}
return kvPairs, nil
}
// GetValue returns the value for a provided atomic field, without processing the rest of the event.
// For unstruct_event, it returns a map of only the data for the unstruct event.
func (event ParsedEvent) GetValue(field string) (interface{}, error) {
kvPairs, err := event.getParsedValue(field)
if err != nil {
return nil, err
}
if field == "contexts" || field == "derived_contexts" || field == "unstruct_event" {
// TODO: DRY HERE TOO?
output := make(map[string]interface{})
for _, pair := range kvPairs {
output[pair.Key] = pair.Value
}
return output, nil
}
return kvPairs[0].Value, nil
}
// GetUnstructEventValue returns the value for a provided atomic field inside an event's unstruct_event field
func (event ParsedEvent) GetUnstructEventValue(path ...interface{}) (interface{}, error) {
fullPath := append([]interface{}{`data`, `data`}, path...)
el := json.Get([]byte(event[indexMap["unstruct_event"]]), fullPath...)
return el.GetInterface(), el.LastError()
}
// GetContextValue returns the value for a provided atomic field inside an event's contexts or derived_contexts
func (event ParsedEvent) GetContextValue(contextName string, path ...interface{}) (interface{}, error) {
contextNames := []string{`contexts`, `derived_contexts`}
var contexts []interface{}
for _, c := range contextNames {
kvPairs, err := event.getParsedValue(c)
if err != nil && err.Error() != EmptyFieldErr {
return nil, err
}
// extract the key/value pairs of the event path into a map
eventMap := make(map[string]interface{})
for _, pair := range kvPairs {
eventMap[pair.Key] = pair.Value
}
contexts = append(contexts, eventMap)
}
var output []interface{}
b := make([]interface{}, len(path))
for idx := range path {
b[idx] = path[idx]
}
// iterate through all contextNames and extract the requested path to the output slice
for _, ctx := range contexts {
for key, contextSlice := range ctx.(map[string]interface{}) {
if key == contextName {
for _, ctxValues := range contextSlice.([]interface{}) {
ctxValuesMap := ctxValues.(map[string]interface{})
// output whole context if path is not defined
if len(path) == 0 {
output = append(output, ctxValuesMap)
continue
}
j, err := json.Marshal(ctxValuesMap)
if err != nil {
return nil, err
}
el := json.Get(j, b...)
if el.LastError() == nil {
output = append(output, el.GetInterface())
}
}
}
}
}
return output, nil
}
// GetSubsetMap returns a map of a subset of the event, containing only the atomic fields provided, without processing the rest of the event.
// For custom events and contexts, only "unstruct_event", "contexts", or "derived_contexts" may be provided, which will produce the entire data object for that field.
// For contexts, the resultant map will contain all occurrences of all contexts within the provided field.
func (event ParsedEvent) GetSubsetMap(fields ...string) (map[string]interface{}, error) {
if len(event) != eventLength {
return nil, errors.New(fmt.Sprintf("Cannot get values - wrong number of fields provided: %v", len(event)))
}
output := make(map[string]interface{})
for _, field := range fields {
index, ok := indexMap[field]
if !ok {
return nil, errors.New(fmt.Sprintf("Key %s not a valid atomic field", field))
}
if event[index] != "" {
kvPairs, err := enrichedEventFieldTypes[index].ParseFunction(enrichedEventFieldTypes[index].Key, event[index])
if err != nil {
return nil, err
}
for _, pair := range kvPairs {
output[pair.Key] = pair.Value
}
}
}
return output, nil
}
// GetSubsetJson returns a JSON object containing a subset of the event, containing only the atomic fields provided, without processing the rest of the event.
// For custom events and contexts, only "unstruct_event", "contexts", or "derived_contexts" may be provided, which will produce the entire data object for that field.
// For contexts, the resultant map will contain all occurrences of all contexts within the provided field.
func (event ParsedEvent) GetSubsetJson(fields ...string) ([]byte, error) {
if len(event) != eventLength {
return nil, errors.New(fmt.Sprintf("Cannot get values - wrong number of fields provided: %v", len(event)))
}
subsetMap, err := event.GetSubsetMap(fields...)
if err != nil {
return nil, err
}
subsetJson, err := json.Marshal(subsetMap)
if err != nil {
return nil, err
}
return subsetJson, nil
}