/
struct_parsing.go
367 lines (314 loc) · 9.2 KB
/
struct_parsing.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
package codegen
import (
"fmt"
"go/types"
"strings"
"github.com/sirupsen/logrus"
)
// StructModel represents a parsed source-code of a struct and all its fields
type StructModel struct {
Name string
Fields []*FieldModel
}
// FieldModel represents a parsed source-code of a struct field
type FieldModel struct {
Name string
Type string
Tags string
IsTimeField bool
ReferencedStructs []*StructModel // Parsed structs referenced by this field, could be multiple if it's a map
}
// ParseStruct parses a struct and all its fields
func ParseStruct(object *types.Named, excludedFields []string, nameOverride *string) (*StructModel, error) {
// validate that the underlying type is a struct
underlyingObject := object.Underlying()
underlyingStruct, ok := underlyingObject.(*types.Struct)
if !ok {
return nil, ErrNotAStruct
}
StructModel := &StructModel{
Name: object.Obj().Name(),
Fields: []*FieldModel{},
}
if nameOverride != nil {
StructModel.Name = *nameOverride
}
for i := 0; i < underlyingStruct.NumFields(); i++ {
underlyingField := underlyingStruct.Field(i)
// skip fields that are not exported
if !underlyingField.Exported() {
continue
}
// skip fields that are in the excluded list
if stringInList(underlyingField.Name(), excludedFields) {
continue
}
field, err := ParseStructField(underlyingField, excludedFields)
if err != nil {
return nil, err
}
if field != nil {
StructModel.Fields = append(StructModel.Fields, field)
}
}
return StructModel, nil
}
// ParseStructField parses a single struct field
func ParseStructField(field *types.Var, excludedFields []string) (*FieldModel, error) {
fieldType := field.Type()
pointedType, ok := fieldType.(*types.Pointer)
if ok {
fieldType = pointedType.Elem()
}
fieldName := field.Name()
var fieldTypeName string
var err error
timeField := false
if isTimeField(fieldType.String()) {
fieldTypeName = "int64"
timeField = true
} else {
fieldTypeName, err = getTypeName(fieldType)
if err != nil {
return nil, err
}
// in the case that the field type can't be determined, like if it's an interface, do nothing
if fieldTypeName == "" {
return nil, nil
}
}
fieldTypeModels, err := parseReferencedStructs(fieldType, excludedFields)
if err != nil {
return nil, err
}
return &FieldModel{
Name: fieldName,
Type: fieldTypeName,
ReferencedStructs: fieldTypeModels,
IsTimeField: timeField,
}, nil
}
func (p *StructModel) MarshalToSourceCode() string {
var buf strings.Builder
err := structSourceCodeTemplate.Execute(&buf, p)
if err != nil {
panic(err)
}
return buf.String()
}
func (p *StructModel) GetReferencedStructs() []*StructModel {
structs := []*StructModel{}
for _, field := range p.Fields {
structs = append(structs, field.ReferencedStructs...)
for _, fieldModel := range field.ReferencedStructs {
structs = append(structs, fieldModel.GetReferencedStructs()...)
}
}
return structs
}
func (p *StructModel) GetRequiredImports() []string {
imports := []string{}
for _, field := range p.Fields {
if isTimeField(field.Type) {
imports = append(imports, "time")
}
}
return imports
}
func (p *StructModel) ConvertTagFields() {
for _, field := range p.Fields {
if isTagField(field.Name) && strings.HasPrefix(field.Type, "[]") {
field.Name = "Tags"
field.Type = "map[string]string"
}
}
}
func getParquetTags(field *FieldModel) string {
fieldSnakeCaseName := ToSnakeCase(field.Name)
tags := ""
parquetTags := "name=" + fieldSnakeCaseName
if strings.HasPrefix(field.Type, "[]") {
listType := field.Type[2:]
listTypeString := typeToParquetType(listType)
listConvertedTypeString := typeToParquetConvertedType(listType)
parquetTags += ",type=MAP,convertedtype=LIST"
if listTypeString != "" {
parquetTags += ",valuetype=" + listTypeString
if listConvertedTypeString != "" {
parquetTags += ",valueconvertedtype=" + listConvertedTypeString
}
}
} else if strings.HasPrefix(field.Type, "map") {
groups := mapRegex.FindStringSubmatch(field.Type)
if len(groups) != 3 {
fmt.Println(groups)
panic("map type not matched by regex: " + field.Type)
}
parquetTags += ",type=MAP"
keyTypeString := typeToParquetType(groups[1])
keyConvertedTypeString := typeToParquetConvertedType(groups[1])
valueTypeString := typeToParquetType(groups[2])
valueConvertedTypeString := typeToParquetConvertedType(groups[2])
if keyTypeString != "" {
parquetTags += ",keytype=" + keyTypeString
}
if valueTypeString != "" {
parquetTags += ",valuetype=" + valueTypeString
}
if keyConvertedTypeString != "" {
parquetTags += ",keyconvertedtype=" + keyConvertedTypeString
}
if valueConvertedTypeString != "" {
parquetTags += ",valueconvertedtype=" + valueConvertedTypeString
}
} else if field.IsTimeField {
parquetTags += ",type=INT64,convertedtype=TIMESTAMP_MILLIS"
} else {
typeString := typeToParquetType(field.Type)
convertedTypeString := typeToParquetConvertedType(field.Type)
if typeString != "" {
parquetTags += ",type=" + typeString
}
if convertedTypeString != "" {
parquetTags += ",convertedtype=" + convertedTypeString
}
}
tags += "parquet:\"" + parquetTags + "\""
return tags
}
func (p *StructModel) PopulateFieldTags(primaryObjectField string) {
for _, field := range p.Fields {
tags := ""
// populate bson tags
var fieldSnakeCaseName string
if field.Name == "InventoryUUID" {
fieldSnakeCaseName = "_id"
} else {
fieldSnakeCaseName = ToSnakeCase(field.Name)
}
// add mongodb tags
tags += "bson:\"" + fieldSnakeCaseName + ",omitempty\""
// add ion tags
tags += " ion:\"" + fieldSnakeCaseName + "\""
// add dynamodb tags
dynamodbTags := " dynamodbav:\"" + fieldSnakeCaseName
if field.Type == "time.Time" || field.Type == "*time.Time" {
dynamodbTags += ",unixtime"
}
if field.Type != "bool" && field.Type != "*bool" {
dynamodbTags += ",omitempty"
}
dynamodbTags += "\""
tags += dynamodbTags
// add parquet tags
parquetTags := getParquetTags(field)
tags += " " + parquetTags
// add primary key tag
if field.Name == primaryObjectField {
tags += " inventory_primary_key:\"true\""
}
// add json tags
tags += " json:\"" + fieldSnakeCaseName + ",omitempty\""
// add diff tags
if field.Name == "ReportTime" {
tags += " diff:\"report_time,immutable\""
} else if field.Name == "InventoryUUID" {
tags += " diff:\"-\""
} else if field.Name == primaryObjectField {
tags += " diff:\"" + fieldSnakeCaseName + ",identifier\""
} else {
tags += " diff:\"" + fieldSnakeCaseName + "\""
}
field.Tags = "`" + tags + "`"
}
}
// getTypeName returns the "name" of the type as it should be written in the generated code
// for example, if the type is a pointer to a struct MyStruct, it will return "*MyStruct"
// this dereferences any pointers to basic types, and makes and reference to structs a pointer
func getTypeName(t types.Type) (string, error) {
switch v := t.(type) {
default:
logrus.Errorf("unexpected type %T", v)
return "", ErrUnsupportedFieldType
case *types.Pointer:
return getTypeName(v.Elem())
case *types.Interface:
return "", nil
case *types.Basic:
return v.Name(), nil
case *types.Named:
switch k := v.Underlying().(type) {
default:
return getTypeName(k)
case *types.Struct:
// always use pointers to structs since they may be optional in some cases
return "*" + v.Obj().Name(), nil
}
case *types.Array:
return fmt.Sprintf("[%v]", v.Len()), nil
case *types.Slice:
elemTypeName, err := getTypeName(v.Elem())
if err != nil {
return "", err
}
return "[]" + elemTypeName, nil
case *types.Map:
keyTypeName, err := getTypeName(v.Key())
if err != nil {
return "", err
}
elemTypeName, err := getTypeName(v.Elem())
if err != nil {
return "", err
}
return "map[" + keyTypeName + "]" + elemTypeName, nil
}
}
// parseReferencedStructs parses the referenced structs of a field, if any exist
// this returns a list since a Map type can reference multiple structs
func parseReferencedStructs(t types.Type, excludedFields []string) ([]*StructModel, error) {
switch v := t.(type) {
default:
return nil, nil
case *types.Pointer:
return parseReferencedStructs(v.Elem(), excludedFields)
case *types.Slice:
return parseReferencedStructs(v.Elem(), excludedFields)
case *types.Map:
keyModels, err := parseReferencedStructs(v.Key(), excludedFields)
if err != nil {
return nil, err
}
valueModels, err := parseReferencedStructs(v.Elem(), excludedFields)
if err != nil {
fmt.Println(err)
return nil, err
}
return append(keyModels, valueModels...), nil
case *types.Named:
switch v.Underlying().(type) {
default:
return nil, nil
case *types.Struct:
if isTimeField(v.Obj().Type().String()) {
return nil, nil
}
model, err := ParseStruct(v, excludedFields, nil)
if err != nil {
return nil, err
}
return []*StructModel{model}, nil
}
}
}
func DeduplicateStructs(structs []*StructModel) []*StructModel {
structNames := map[string]bool{}
dedupedStructs := []*StructModel{}
for _, s := range structs {
if !structNames[s.Name] {
structNames[s.Name] = true
dedupedStructs = append(dedupedStructs, s)
}
}
return dedupedStructs
}