/
tabular.go
216 lines (188 loc) · 6 KB
/
tabular.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
// Package tabular defines functions for working with rectangular datasets.
// qri positions tabular data as a special shape that comes with additional
// constraints. This package defines the methods necessary to enforce and
// interpret those constraints
package tabular
import (
"encoding/json"
"errors"
"fmt"
"regexp"
"strings"
)
// ErrInvalidTabularSchema is a base type for schemas that don't work as tables
// all parsing errors in this package can be errors.Is() to this one
var ErrInvalidTabularSchema = errors.New("invalid tabular schema")
// BaseTabularSchema is the base schema for tabular data
var BaseTabularSchema = map[string]interface{}{
"type": "array",
"items": map[string]interface{}{
"type": "array",
"items": []interface{}{},
},
}
// Columns is an ordered list of column information
type Columns []Column
// Titles gives just column titles as a slice of strings
func (cols Columns) Titles() []string {
titles := make([]string, len(cols))
for i, col := range cols {
titles[i] = col.Title
}
return titles
}
var validMachineTitle = regexp.MustCompile(`^[a-zA-Z_$][a-zA-Z_$0-9]*$`)
// ValidMachineTitles confirms column titles are valid for machine-readability
// using column titles that parse as proper variable names, and unique titles
// across the column set
func (cols Columns) ValidMachineTitles() error {
var problems []string
set := map[string]struct{}{}
for i, col := range cols {
t := col.Title
if !validMachineTitle.MatchString(t) {
problems = append(problems, fmt.Sprintf("col. %d name '%s' is not a valid column name", i, t))
}
if _, present := set[t]; present {
problems = append(problems, fmt.Sprintf("col. %d name '%s' is not unique", i, t))
}
set[t] = struct{}{}
}
if len(problems) > 0 {
return fmt.Errorf("%w: column names have problems:\n%s", ErrInvalidTabularSchema, strings.Join(problems, "\n"))
}
return nil
}
// Column defines values associated with an index of each row of data
type Column struct {
Title string `json:"title"`
Type *ColType `json:"type"`
Description string `json:"description,omitempty"`
Validation map[string]interface{} `json:"validation,omitempty"`
}
// ColType implements type information for a tabular column. Column Types can
// be one or more strings enumerating accepted types
type ColType []string
// HasType ranges over the column types and returns true if the type is present
func (ct ColType) HasType(t string) bool {
for _, x := range ct {
if x == t {
return true
}
}
return false
}
// MarshalJSON encodes to string in the common case of a single type, an array
// of strings for a type enumeration
func (ct ColType) MarshalJSON() ([]byte, error) {
switch len(ct) {
case 0:
return nil, nil
case 1:
return json.Marshal(ct[0])
default:
return json.Marshal([]string(ct))
}
}
// UnmarshalJSON decodes string and string array data types
func (ct *ColType) UnmarshalJSON(p []byte) error {
var str string
if err := json.Unmarshal(p, &str); err == nil {
*ct = ColType{str}
return nil
}
var strs []string
if err := json.Unmarshal(p, &strs); err == nil {
*ct = ColType(strs)
return nil
}
return fmt.Errorf("invalid data for ColType")
}
// ColumnsFromJSONSchema extracts column data from a jsonSchema object, erroring
// if the provided schema cannot be used to describe a table. a slice of problem
// strings describes non-breaking issues with the schema that should be
// addressed like missing column titles or column types
// the passed in schema must be a decoding of a json schema into default type
// mappings from the encoding/json package
func ColumnsFromJSONSchema(sch map[string]interface{}) (Columns, []string, error) {
topLevelType, ok := sch["type"].(string)
if !ok {
msg := "top-level 'type' field is required"
return nil, nil, fmt.Errorf("%w: %s", ErrInvalidTabularSchema, msg)
}
switch topLevelType {
case "array":
return arrayWrapperColumns(sch)
case "object":
return objectWrapperColumns(sch)
default:
msg := fmt.Sprintf("'%s' is not a valid type to describe the top level of a tablular schema", topLevelType)
return nil, nil, fmt.Errorf("%w: %s", ErrInvalidTabularSchema, msg)
}
}
func arrayWrapperColumns(sch map[string]interface{}) (Columns, []string, error) {
var problems []string
itemObj, ok := sch["items"].(map[string]interface{})
if !ok {
msg := "top level 'items' property must be an object"
return nil, nil, fmt.Errorf("%w: %s", ErrInvalidTabularSchema, msg)
}
itemArr, ok := itemObj["items"].([]interface{})
if !ok {
msg := "items.items must be an array"
return nil, nil, fmt.Errorf("%w: %s", ErrInvalidTabularSchema, msg)
}
cols := make([]Column, len(itemArr))
for i, f := range itemArr {
cols[i].Title = fmt.Sprintf("col_%d", i)
cols[i].Type = &ColType{"string"}
colSchema, ok := f.(map[string]interface{})
if !ok {
problems = append(problems, fmt.Sprintf("col. %d schema should be an object", i))
continue
}
setTitle, setType := false, false
for key, val := range colSchema {
switch key {
case "title":
if title, ok := val.(string); ok {
setTitle = true
cols[i].Title = title
}
case "type":
setType = true
switch x := val.(type) {
case string:
cols[i].Type = &ColType{x}
case []interface{}:
types := ColType{}
for _, v := range x {
if t, ok := v.(string); ok {
types = append(types, t)
}
}
cols[i].Type = &types
}
case "description":
if d, ok := val.(string); ok {
cols[i].Description = d
}
default:
if cols[i].Validation == nil {
cols[i].Validation = map[string]interface{}{}
}
cols[i].Validation[key] = val
}
}
if !setTitle {
problems = append(problems, fmt.Sprintf("col. %d title is not set", i))
}
if !setType {
problems = append(problems, fmt.Sprintf("col, %d type is not set, defaulting to string", i))
}
}
return cols, problems, nil
}
func objectWrapperColumns(sch map[string]interface{}) (Columns, []string, error) {
return nil, nil, fmt.Errorf("unfinished")
}