diff --git a/json.go b/json.go new file mode 100644 index 0000000..764e2a8 --- /dev/null +++ b/json.go @@ -0,0 +1,139 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package json + +import ( + gojson "encoding/json" + "fmt" + "io" + + internaljson "sigs.k8s.io/json/internal/golang/encoding/json" +) + +// Decoder describes the decoding API exposed by `encoding/json#Decoder` +type Decoder interface { + Decode(v interface{}) error + Buffered() io.Reader + Token() (gojson.Token, error) + More() bool + InputOffset() int64 +} + +// NewDecoderCaseSensitivePreserveInts returns a decoder that matches the behavior of encoding/json#NewDecoder, with the following changes: +// - When unmarshaling into a struct, JSON keys must case-sensitively match `json` tag names (for tagged struct fields) +// or struct field names (for untagged struct fields), or they are treated as unknown fields and discarded. +// - When unmarshaling a number into an interface value, it is unmarshaled as an int64 if +// the JSON data does not contain a "." character and parses as an integer successfully and +// does not overflow int64. Otherwise, the number is unmarshaled as a float64. +// - If a syntax error is returned, it will not be of type encoding/json#SyntaxError, +// but will be recognizeable by this package's IsSyntaxError() function. +func NewDecoderCaseSensitivePreserveInts(r io.Reader) Decoder { + d := internaljson.NewDecoder(r) + d.CaseSensitive() + d.PreserveInts() + return d +} + +// UnmarshalCaseSensitivePreserveInts parses the JSON-encoded data and stores the result in the value pointed to by v. +// +// UnmarshalCaseSensitivePreserveInts matches the behavior of encoding/json#Unmarshal, with the following changes: +// - When unmarshaling into a struct, JSON keys must case-sensitively match `json` tag names (for tagged struct fields) +// or struct field names (for untagged struct fields), or they are treated as unknown fields and discarded. +// - When unmarshaling a number into an interface value, it is unmarshaled as an int64 if +// the JSON data does not contain a "." character and parses as an integer successfully and +// does not overflow int64. Otherwise, the number is unmarshaled as a float64. +// - If a syntax error is returned, it will not be of type encoding/json#SyntaxError, +// but will be recognizeable by this package's IsSyntaxError() function. +func UnmarshalCaseSensitivePreserveInts(data []byte, v interface{}) error { + return internaljson.Unmarshal( + data, + v, + internaljson.CaseSensitive, + internaljson.PreserveInts, + ) +} + +type StrictOption int + +const ( + // DisallowDuplicateFields returns strict errors if data contains duplicate fields + DisallowDuplicateFields StrictOption = 1 + + // DisallowUnknownFields returns strict errors if data contains unknown fields when decoding into typed structs + DisallowUnknownFields StrictOption = 2 +) + +// UnmarshalStrict parses the JSON-encoded data and stores the result in the value pointed to by v. +// Unmarshaling is performed identically to UnmarshalCaseSensitivePreserveInts(), returning an error on failure. +// +// If parsing succeeds, additional strict checks as selected by `strictOptions` are performed +// and a list of the strict failures (if any) are returned. If no `strictOptions` are selected, +// all supported strict checks are performed. +// +// Currently supported strict checks are: +// - DisallowDuplicateFields: ensure the data contains no duplicate fields +// - DisallowUnknownFields: ensure the data contains no unknown fields (when decoding into typed structs) +// +// Additional strict checks may be added in the future. +// +// Note that the strict checks do not change what is stored in v. +// For example, if duplicate fields are present, they will be parsed and stored in v, +// and errors about the duplicate fields will be returned in the strict error list. +func UnmarshalStrict(data []byte, v interface{}, strictOptions ...StrictOption) (strictErrors []error, err error) { + if len(strictOptions) == 0 { + err = internaljson.Unmarshal(data, v, + // options matching UnmarshalCaseSensitivePreserveInts + internaljson.CaseSensitive, + internaljson.PreserveInts, + // all strict options + internaljson.DisallowDuplicateFields, + internaljson.DisallowUnknownFields, + ) + } else { + opts := make([]internaljson.UnmarshalOpt, 0, 2+len(strictOptions)) + // options matching UnmarshalCaseSensitivePreserveInts + opts = append(opts, internaljson.CaseSensitive, internaljson.PreserveInts) + for _, strictOpt := range strictOptions { + switch strictOpt { + case DisallowDuplicateFields: + opts = append(opts, internaljson.DisallowDuplicateFields) + case DisallowUnknownFields: + opts = append(opts, internaljson.DisallowUnknownFields) + default: + return nil, fmt.Errorf("unknown strict option %d", strictOpt) + } + } + err = internaljson.Unmarshal(data, v, opts...) + } + + if strictErr, ok := err.(*internaljson.UnmarshalStrictError); ok { + return strictErr.Errors, nil + } + return nil, err +} + +// SyntaxErrorOffset returns if the specified error is a syntax error produced by encoding/json or this package. +func SyntaxErrorOffset(err error) (isSyntaxError bool, offset int64) { + switch err := err.(type) { + case *gojson.SyntaxError: + return true, err.Offset + case *internaljson.SyntaxError: + return true, err.Offset + default: + return false, 0 + } +} diff --git a/json_test.go b/json_test.go new file mode 100644 index 0000000..1557b0e --- /dev/null +++ b/json_test.go @@ -0,0 +1,400 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package json + +import ( + "bytes" + gojson "encoding/json" + "io/ioutil" + "reflect" + "strings" + "testing" +) + +func TestSyntaxErrorOffset(t *testing.T) { + malformedJSON := + []byte(`{ + "test1":true, + "test2":true + "test3":true +}`) + + err1 := UnmarshalCaseSensitivePreserveInts(malformedJSON, &map[string]interface{}{}) + if err1 == nil { + t.Fatal("expected err, got none") + } + ok1, offset1 := SyntaxErrorOffset(err1) + if !ok1 { + t.Fatal("expected ok, got false") + } + + err2 := gojson.Unmarshal(malformedJSON, &map[string]interface{}{}) + if err2 == nil { + t.Fatal("expected err, got none") + } + ok2, offset2 := SyntaxErrorOffset(err2) + if !ok2 { + t.Fatal("expected ok, got false") + } + if offset1 != offset2 { + t.Fatalf("offset mismatch from stdlib and custom: %d != %d", offset1, offset2) + } +} + +func TestUnmarshal(t *testing.T) { + type Obj struct { + A int `json:"a"` + B int `json:"b"` + C map[string]string `json:"c"` + D int + E int + } + + testcases := []struct { + name string + in string + to func() interface{} + expect interface{} + expectErr string + expectStrictErrs []string + }{ + { + name: "simple", + in: `{"a":1}`, + to: func() interface{} { return map[string]interface{}{} }, + expect: map[string]interface{}{"a": int64(1)}, + }, + { + name: "case-sensitive", + in: `{"a":1,"A":2,"B":3}`, + to: func() interface{} { return &Obj{} }, + expect: &Obj{A: 1}, // case-mismatches don't decode + expectStrictErrs: []string{`unknown field "A"`, `unknown field "B"`}, // multiple strict errors are returned + }, + { + name: "duplicate untyped", + in: `{"a":1,"a":2,"b":1,"b":2}`, + to: func() interface{} { return map[string]interface{}{} }, + expect: map[string]interface{}{"a": int64(2), "b": int64(2)}, // last duplicates win + expectStrictErrs: []string{`duplicate field "a"`, `duplicate field "b"`}, // multiple strict errors are returned + }, + { + name: "duplicate typed", + in: `{"a":1,"a":2,"b":1,"b":2}`, + to: func() interface{} { return &Obj{} }, + expect: &Obj{A: 2, B: 2}, // last duplicates win + expectStrictErrs: []string{`duplicate field "a"`, `duplicate field "b"`}, // multiple strict errors are returned + }, + { + name: "duplicate map field", + in: `{"c":{"a":"1","a":"2","b":"1","b":"2"}}`, + to: func() interface{} { return &Obj{} }, + expect: &Obj{C: map[string]string{"a": "2", "b": "2"}}, // last duplicates win + expectStrictErrs: []string{`duplicate field "a"`, `duplicate field "b"`}, // multiple strict errors are returned + }, + { + name: "unknown fields", + in: `{"a":1,"unknown":true,"unknown2":false,"b":2}`, + to: func() interface{} { return &Obj{} }, + expect: &Obj{A: 1, B: 2}, // data is populated + expectStrictErrs: []string{`unknown field "unknown"`, `unknown field "unknown2"`}, // multiple strict errors are returned + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + unmarshalTo := tc.to() + err := UnmarshalCaseSensitivePreserveInts([]byte(tc.in), &unmarshalTo) + + strictUnmarshalTo := tc.to() + strictErrors, strictErr := UnmarshalStrict([]byte(tc.in), &strictUnmarshalTo) + + decodeTo := tc.to() + decodeErr := NewDecoderCaseSensitivePreserveInts(bytes.NewBuffer([]byte(tc.in))).Decode(&decodeTo) + + // ensure expected errors are returned + if (len(tc.expectErr) > 0) != (err != nil) { + t.Fatalf("expected err=%v, got %v", len(tc.expectErr) > 0, err) + } + if len(tc.expectErr) > 0 && !strings.Contains(err.Error(), tc.expectErr) { + t.Fatalf("expected error containing '%s', got %v", tc.expectErr, err) + } + + // ensure expected strict errors are returned + if len(tc.expectStrictErrs) != len(strictErrors) { + t.Fatalf("expected %d strict errors, got %v", len(tc.expectStrictErrs), strictErrors) + } + for i := range tc.expectStrictErrs { + if !strings.Contains(strictErrors[i].Error(), tc.expectStrictErrs[i]) { + t.Fatalf("expected strict errors:\n %s\ngot:\n %v", strings.Join(tc.expectStrictErrs, "\n "), strictErrors) + } + } + + // ensure expected decode errors are returned + if (len(tc.expectErr) > 0) != (decodeErr != nil) { + t.Fatalf("expected err=%v, got %v", len(tc.expectErr) > 0, decodeErr) + } + if len(tc.expectErr) > 0 && !strings.Contains(decodeErr.Error(), tc.expectErr) { + t.Fatalf("expected error containing '%s', got %v", tc.expectErr, decodeErr) + } + + // ensure we got the expected object back + if !reflect.DeepEqual(tc.expect, unmarshalTo) { + t.Fatalf("expected\n%#v\ngot\n%#v", tc.expect, unmarshalTo) + } + if !reflect.DeepEqual(tc.expect, decodeTo) { + t.Fatalf("expected\n%#v\ngot\n%#v", tc.expect, decodeTo) + } + + // ensure Unmarshal and UnmarshalStrict return identical errors and objects + if !reflect.DeepEqual(err, strictErr) { + t.Fatalf("unmarshal/strictunmarshal returned different errors:\n%v\n%v", err, strictErr) + } + if !reflect.DeepEqual(unmarshalTo, strictUnmarshalTo) { + t.Fatalf("unmarshal/strictunmarshal returned different objects:\n%#v\n%#v", unmarshalTo, strictUnmarshalTo) + } + + // ensure Unmarshal and Decode return identical errors and objects + if !reflect.DeepEqual(err, decodeErr) { + t.Fatalf("unmarshal/decode returned different errors:\n%v\n%v", err, decodeErr) + } + if !reflect.DeepEqual(unmarshalTo, decodeTo) { + t.Fatalf("unmarshal/decode returned different objects:\n%#v\n%#v", unmarshalTo, decodeTo) + } + }) + } +} + +func BenchmarkUnmarshal(b *testing.B) { + testcases := []struct { + name string + unmarshal func(b *testing.B, data []byte, v interface{}) + }{ + { + name: "stdlib", + unmarshal: func(b *testing.B, data []byte, v interface{}) { + if err := gojson.Unmarshal(data, v); err != nil { + b.Fatal(err) + } + }, + }, + { + name: "unmarshal", + unmarshal: func(b *testing.B, data []byte, v interface{}) { + if err := UnmarshalCaseSensitivePreserveInts(data, v); err != nil { + b.Fatal(err) + } + }, + }, + { + name: "strict", + unmarshal: func(b *testing.B, data []byte, v interface{}) { + if strict, err := UnmarshalStrict(data, v); err != nil { + b.Fatal(err) + } else if len(strict) > 0 { + b.Fatal(strict) + } + }, + }, + { + name: "strict-custom", + unmarshal: func(b *testing.B, data []byte, v interface{}) { + if strict, err := UnmarshalStrict(data, v, DisallowDuplicateFields, DisallowUnknownFields); err != nil { + b.Fatal(err) + } else if len(strict) > 0 { + b.Fatal(strict) + } + }, + }, + } + + data, err := ioutil.ReadFile("testdata/bench.json") + if err != nil { + b.Fatal(err) + } + b.ResetTimer() + + for _, tc := range testcases { + b.Run("typed_"+tc.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + tc.unmarshal(b, data, &A{}) + } + }) + } + for _, tc := range testcases { + b.Run("untyped_"+tc.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + tc.unmarshal(b, data, &map[string]interface{}{}) + } + }) + } +} + +type A struct { + Int int `json:"int"` + Bool bool `json:"bool"` + String string `json:"string"` + + StringMap map[string]string `json:"map"` + ObjectArray []A `json:"array"` + + Small Small `json:"small"` + Big Big `json:"big"` + + Custom Custom `json:"custom"` +} + +type Small struct { + F01 string `json:"f01"` + F02 string `json:"f02"` + F03 string `json:"f03"` + F04 string `json:"f04"` + F05 string `json:"f05"` + F06 string `json:"f06"` + F07 string `json:"f07"` + F08 string `json:"f08"` + F09 string `json:"f09"` + F10 string `json:"f10"` + F11 string `json:"f11"` + F12 string `json:"f12"` + F13 string `json:"f13"` + F14 string `json:"f14"` + F15 string `json:"f15"` + F16 string `json:"f16"` + F17 string `json:"f17"` + F18 string `json:"f18"` + F19 string `json:"f19"` + F20 string `json:"f20"` + F21 string `json:"f21"` + F22 string `json:"f22"` + F23 string `json:"f23"` + F24 string `json:"f24"` + F25 string `json:"f25"` + F26 string `json:"f26"` + F27 string `json:"f27"` + F28 string `json:"f28"` + F29 string `json:"f29"` + F30 string `json:"f30"` + F31 string `json:"f31"` + F32 string `json:"f32"` + F33 string `json:"f33"` + F34 string `json:"f34"` + F35 string `json:"f35"` + F36 string `json:"f36"` + F37 string `json:"f37"` + F38 string `json:"f38"` + F39 string `json:"f39"` + F40 string `json:"f40"` + F41 string `json:"f41"` + F42 string `json:"f42"` + F43 string `json:"f43"` + F44 string `json:"f44"` + F45 string `json:"f45"` + F46 string `json:"f46"` + F47 string `json:"f47"` + F48 string `json:"f48"` + F49 string `json:"f49"` + F50 string `json:"f50"` + F51 string `json:"f51"` + F52 string `json:"f52"` + F53 string `json:"f53"` + F54 string `json:"f54"` + F55 string `json:"f55"` + F56 string `json:"f56"` + F57 string `json:"f57"` + F58 string `json:"f58"` + F59 string `json:"f59"` + F60 string `json:"f60"` + F61 string `json:"f61"` + F62 string `json:"f62"` + F63 string `json:"f63"` + F64 string `json:"f64"` +} + +type Big struct { + F01 string `json:"f01"` + F02 string `json:"f02"` + F03 string `json:"f03"` + F04 string `json:"f04"` + F05 string `json:"f05"` + F06 string `json:"f06"` + F07 string `json:"f07"` + F08 string `json:"f08"` + F09 string `json:"f09"` + F10 string `json:"f10"` + F11 string `json:"f11"` + F12 string `json:"f12"` + F13 string `json:"f13"` + F14 string `json:"f14"` + F15 string `json:"f15"` + F16 string `json:"f16"` + F17 string `json:"f17"` + F18 string `json:"f18"` + F19 string `json:"f19"` + F20 string `json:"f20"` + F21 string `json:"f21"` + F22 string `json:"f22"` + F23 string `json:"f23"` + F24 string `json:"f24"` + F25 string `json:"f25"` + F26 string `json:"f26"` + F27 string `json:"f27"` + F28 string `json:"f28"` + F29 string `json:"f29"` + F30 string `json:"f30"` + F31 string `json:"f31"` + F32 string `json:"f32"` + F33 string `json:"f33"` + F34 string `json:"f34"` + F35 string `json:"f35"` + F36 string `json:"f36"` + F37 string `json:"f37"` + F38 string `json:"f38"` + F39 string `json:"f39"` + F40 string `json:"f40"` + F41 string `json:"f41"` + F42 string `json:"f42"` + F43 string `json:"f43"` + F44 string `json:"f44"` + F45 string `json:"f45"` + F46 string `json:"f46"` + F47 string `json:"f47"` + F48 string `json:"f48"` + F49 string `json:"f49"` + F50 string `json:"f50"` + F51 string `json:"f51"` + F52 string `json:"f52"` + F53 string `json:"f53"` + F54 string `json:"f54"` + F55 string `json:"f55"` + F56 string `json:"f56"` + F57 string `json:"f57"` + F58 string `json:"f58"` + F59 string `json:"f59"` + F60 string `json:"f60"` + F61 string `json:"f61"` + F62 string `json:"f62"` + F63 string `json:"f63"` + F64 string `json:"f64"` + F65 string `json:"f65"` +} + +type Custom struct{} + +func (c *Custom) UnmarshalJSON(data []byte) error { + return nil +} diff --git a/testdata/bench.json b/testdata/bench.json new file mode 100644 index 0000000..6c5be12 --- /dev/null +++ b/testdata/bench.json @@ -0,0 +1,180 @@ +{ + "int": 1, + "bool": true, + "string": "This is a test of a string 1", + "map": { + "a": "1", + "b": "2", + "c": "3", + "d": "4" + }, + "custom": {"some":"custom","unmarshaling":true}, + "array": [ + { + "int": 1, + "bool": true, + "string": "This is a test of a string 2", + "small": { + "f01": "ABC" + }, + "big": { + "f01": "ABC" + } + }, + { + "int": 1, + "bool": true, + "string": "This is a test of a string 3", + "small": { + "f01": "ABC" + }, + "big": { + "f01": "ABC" + } + }, + { + "int": 1, + "bool": true, + "string": "This is a test of a string 4", + "small": { + "f01": "ABC" + }, + "big": { + "f01": "ABC" + } + } + ], + "small": { + "f01": "ABC", + "f02": "ABC", + "f03": "ABC", + "f04": "ABC", + "f05": "ABC", + "f06": "ABC", + "f07": "ABC", + "f08": "ABC", + "f09": "ABC", + "f10": "ABC", + "f11": "ABC", + "f12": "ABC", + "f13": "ABC", + "f14": "ABC", + "f15": "ABC", + "f16": "ABC", + "f17": "ABC", + "f18": "ABC", + "f19": "ABC", + "f20": "ABC", + "f21": "ABC", + "f22": "ABC", + "f23": "ABC", + "f24": "ABC", + "f25": "ABC", + "f26": "ABC", + "f27": "ABC", + "f28": "ABC", + "f29": "ABC", + "f30": "ABC", + "f31": "ABC", + "f32": "ABC", + "f33": "ABC", + "f34": "ABC", + "f35": "ABC", + "f36": "ABC", + "f37": "ABC", + "f38": "ABC", + "f39": "ABC", + "f40": "ABC", + "f41": "ABC", + "f42": "ABC", + "f43": "ABC", + "f44": "ABC", + "f45": "ABC", + "f46": "ABC", + "f47": "ABC", + "f48": "ABC", + "f49": "ABC", + "f50": "ABC", + "f51": "ABC", + "f52": "ABC", + "f53": "ABC", + "f54": "ABC", + "f55": "ABC", + "f56": "ABC", + "f57": "ABC", + "f58": "ABC", + "f59": "ABC", + "f60": "ABC", + "f61": "ABC", + "f62": "ABC", + "f63": "ABC", + "f64": "ABC" + }, + "big": { + "f01": "ABC", + "f02": "ABC", + "f03": "ABC", + "f04": "ABC", + "f05": "ABC", + "f06": "ABC", + "f07": "ABC", + "f08": "ABC", + "f09": "ABC", + "f10": "ABC", + "f11": "ABC", + "f12": "ABC", + "f13": "ABC", + "f14": "ABC", + "f15": "ABC", + "f16": "ABC", + "f17": "ABC", + "f18": "ABC", + "f19": "ABC", + "f20": "ABC", + "f21": "ABC", + "f22": "ABC", + "f23": "ABC", + "f24": "ABC", + "f25": "ABC", + "f26": "ABC", + "f27": "ABC", + "f28": "ABC", + "f29": "ABC", + "f30": "ABC", + "f31": "ABC", + "f32": "ABC", + "f33": "ABC", + "f34": "ABC", + "f35": "ABC", + "f36": "ABC", + "f37": "ABC", + "f38": "ABC", + "f39": "ABC", + "f40": "ABC", + "f41": "ABC", + "f42": "ABC", + "f43": "ABC", + "f44": "ABC", + "f45": "ABC", + "f46": "ABC", + "f47": "ABC", + "f48": "ABC", + "f49": "ABC", + "f50": "ABC", + "f51": "ABC", + "f52": "ABC", + "f53": "ABC", + "f54": "ABC", + "f55": "ABC", + "f56": "ABC", + "f57": "ABC", + "f58": "ABC", + "f59": "ABC", + "f60": "ABC", + "f61": "ABC", + "f62": "ABC", + "f63": "ABC", + "f64": "ABC", + "f65": "ABC" + } +} \ No newline at end of file