Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

apimachinery: Add a strict YAML and JSON deserializer option #71589

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
74 changes: 74 additions & 0 deletions staging/src/k8s.io/apimachinery/pkg/runtime/error.go
Expand Up @@ -120,3 +120,77 @@ func IsMissingVersion(err error) bool {
_, ok := err.(*missingVersionErr)
return ok
}

// StrictDecoderError is a base error type that is returned by a strict Decoder such
// as UniversalStrictDecoder.
type StrictDecoderError struct {
message string
gvk schema.GroupVersionKind
originalData []byte
}

// UnknownFieldError is an error type that is returned in cases where the input
// JSON or YAML contains unknown fields.
// UnknownFieldError embeds StrictDecoderError.
type UnknownFieldError struct {
StrictDecoderError
}

// DuplicateFieldError is an error type that is returned in cases where the input
// JSON or YAML contains duplicate fields under the same parent field.
// DuplicateFieldError embeds StrictDecoderError.
type DuplicateFieldError struct {
StrictDecoderError
}

// NewStrictDecoderError creates a new NewStrictDecoderError object
func NewStrictDecoderError(message string, gvk schema.GroupVersionKind, originalData []byte) *StrictDecoderError {
return &StrictDecoderError{
message: message,
gvk: gvk,
originalData: originalData,
}
}

func (e *StrictDecoderError) Error() string {
return fmt.Sprintf("strict decoder error for %#v: %s", e.gvk, e.message)
}

// GVK returns the GVK that was extacted when Decoding using a strict Decoder.
func (e *StrictDecoderError) GVK() schema.GroupVersionKind {
return e.gvk
}

// OriginalData returns the original byte slice input that was passed to a strict Decoder.
func (e *StrictDecoderError) OriginalData() []byte {
return e.originalData
}

// IsStrictDecoderError returns true if the error is a result of a strict Decoder.
func IsStrictDecoderError(err error) bool {
if err == nil {
return false
}
_, ok := err.(*StrictDecoderError)
return ok
}

// IsUnknownFieldError returns true if the error is a result of a strict Decoder failing
// due to a unknown field.
func IsUnknownFieldError(err error) bool {
if err == nil {
return false
}
_, ok := err.(*UnknownFieldError)
return ok
}

// IsDuplicateFieldError returns true if the error is a result of a strict Decoder failing
// due to a duplicate field.
func IsDuplicateFieldError(err error) bool {
if err == nil {
return false
}
_, ok := err.(*DuplicateFieldError)
return ok
}
Expand Up @@ -35,6 +35,7 @@ type serializerType struct {
EncodesAsText bool

Serializer runtime.Serializer
StrictSerializer runtime.Serializer
PrettySerializer runtime.Serializer

AcceptStreamContentTypes []string
Expand All @@ -46,8 +47,10 @@ type serializerType struct {

func newSerializersForScheme(scheme *runtime.Scheme, mf json.MetaFactory) []serializerType {
jsonSerializer := json.NewSerializer(mf, scheme, scheme, false)
jsonStrictSerializer := json.NewStrictSerializer(mf, scheme, scheme)
jsonPrettySerializer := json.NewSerializer(mf, scheme, scheme, true)
yamlSerializer := json.NewYAMLSerializer(mf, scheme, scheme)
yamlStrictSerializer := json.NewYAMLStrictSerializer(mf, scheme, scheme)

serializers := []serializerType{
{
Expand All @@ -56,6 +59,7 @@ func newSerializersForScheme(scheme *runtime.Scheme, mf json.MetaFactory) []seri
FileExtensions: []string{"json"},
EncodesAsText: true,
Serializer: jsonSerializer,
StrictSerializer: jsonStrictSerializer,
PrettySerializer: jsonPrettySerializer,

Framer: json.Framer,
Expand All @@ -67,6 +71,7 @@ func newSerializersForScheme(scheme *runtime.Scheme, mf json.MetaFactory) []seri
FileExtensions: []string{"yaml"},
EncodesAsText: true,
Serializer: yamlSerializer,
StrictSerializer: yamlStrictSerializer,
},
}

Expand All @@ -81,10 +86,11 @@ func newSerializersForScheme(scheme *runtime.Scheme, mf json.MetaFactory) []seri
// CodecFactory provides methods for retrieving codecs and serializers for specific
// versions and content types.
type CodecFactory struct {
scheme *runtime.Scheme
serializers []serializerType
universal runtime.Decoder
accepts []runtime.SerializerInfo
scheme *runtime.Scheme
serializers []serializerType
universal runtime.Decoder
universalStrict runtime.Decoder
accepts []runtime.SerializerInfo

legacySerializer runtime.Serializer
}
Expand All @@ -103,12 +109,20 @@ func NewCodecFactory(scheme *runtime.Scheme) CodecFactory {
// newCodecFactory is a helper for testing that allows a different metafactory to be specified.
func newCodecFactory(scheme *runtime.Scheme, serializers []serializerType) CodecFactory {
decoders := make([]runtime.Decoder, 0, len(serializers))
decodersStrict := make([]runtime.Decoder, 0, len(serializers))
var accepts []runtime.SerializerInfo
alreadyAccepted := make(map[string]struct{})

var legacySerializer runtime.Serializer
for _, d := range serializers {
decoders = append(decoders, d.Serializer)
// If a strict serializer is missing, fall back to a non-strict serializer
// so that the 'decoders' and 'decodersStrict' slices are symmetric.
if d.StrictSerializer == nil {
decodersStrict = append(decodersStrict, d.Serializer)
} else {
decodersStrict = append(decodersStrict, d.StrictSerializer)
luxas marked this conversation as resolved.
Show resolved Hide resolved
}
for _, mediaType := range d.AcceptContentTypes {
if _, ok := alreadyAccepted[mediaType]; ok {
continue
Expand Down Expand Up @@ -138,9 +152,10 @@ func newCodecFactory(scheme *runtime.Scheme, serializers []serializerType) Codec
}

return CodecFactory{
scheme: scheme,
serializers: serializers,
universal: recognizer.NewDecoder(decoders...),
scheme: scheme,
serializers: serializers,
universal: recognizer.NewDecoder(decoders...),
universalStrict: recognizer.NewDecoder(decodersStrict...),

accepts: accepts,

Expand Down Expand Up @@ -172,6 +187,14 @@ func (f CodecFactory) UniversalDeserializer() runtime.Decoder {
return f.universal
}

// UniversalStrictDeserializer is the same as UniversalDeserializer except that it uses a strict Decoder
// that also catches unknown and duplicate fields. Such errors are defined as UnknownFieldError
// and DuplicateFieldError. Once an error is found decoding is interrupted and the output object
// is not expected to be valid.
func (f CodecFactory) UniversalStrictDeserializer() runtime.Decoder {
return f.universalStrict
}

// UniversalDecoder returns a runtime.Decoder capable of decoding all known API objects in all known formats. Used
// by clients that do not need to encode objects but want to deserialize API objects stored on disk. Only decodes
// objects in groups registered with the scheme. The GroupVersions passed may be used to select alternate
Expand All @@ -191,6 +214,20 @@ func (f CodecFactory) UniversalDecoder(versions ...schema.GroupVersion) runtime.
return f.CodecForVersions(nil, f.universal, nil, versioner)
}

// UniversalStrictDecoder is the same as UniversalDecoder except that it uses a strict Decoder
// that also catches unknown and duplicate fields. Such errors are defined as UnknownFieldError
// and DuplicateFieldError. Once an error is found decoding is interrupted and the output object
// is not expected to be valid.
func (f CodecFactory) UniversalStrictDecoder(versions ...schema.GroupVersion) runtime.Decoder {
var versioner runtime.GroupVersioner
if len(versions) == 0 {
versioner = runtime.InternalGroupVersioner
} else {
versioner = schema.GroupVersions(versions)
}
return f.CodecForVersions(nil, f.universalStrict, nil, versioner)
}

// CodecForVersions creates a codec with the provided serializer. If an object is decoded and its group is not in the list,
// it will default to runtime.APIVersionInternal. If encode is not specified for an object's group, the object is not
// converted. If encode or decode are nil, no conversion is performed.
Expand Down
Expand Up @@ -20,6 +20,7 @@ import (
"encoding/json"
"io"
"strconv"
"strings"
"unsafe"

jsoniter "github.com/json-iterator/go"
Expand All @@ -45,6 +46,17 @@ func NewSerializer(meta MetaFactory, creater runtime.ObjectCreater, typer runtim
}
}

// NewStrictSerializer is the same as NewSerializer expect that it creates a strict JSON serializer
// that can also return errors of type StrictDecoderError, UnknownFieldError and DuplicateFieldError.
func NewStrictSerializer(meta MetaFactory, creater runtime.ObjectCreater, typer runtime.ObjectTyper) *Serializer {
return &Serializer{
meta: meta,
creater: creater,
typer: typer,
strict: true,
}
}

// NewYAMLSerializer creates a YAML serializer that handles encoding versioned objects into the proper YAML form. If typer
// is not nil, the object has the group, version, and kind fields set. This serializer supports only the subset of YAML that
// matches JSON, and will error if constructs are used that do not serialize to JSON.
Expand All @@ -57,12 +69,25 @@ func NewYAMLSerializer(meta MetaFactory, creater runtime.ObjectCreater, typer ru
}
}

// NewYAMLStrictSerializer is the same as NewYAMLSerializer expect that it creates a strict YAML serializer
// that can also return errors of type StrictDecoderError, UnknownFieldError and DuplicateFieldError
func NewYAMLStrictSerializer(meta MetaFactory, creater runtime.ObjectCreater, typer runtime.ObjectTyper) *Serializer {
return &Serializer{
meta: meta,
creater: creater,
typer: typer,
strict: true,
yaml: true,
}
}

type Serializer struct {
meta MetaFactory
creater runtime.ObjectCreater
typer runtime.ObjectTyper
yaml bool
pretty bool
strict bool
}

// Serializer implements Serializer
Expand Down Expand Up @@ -119,11 +144,28 @@ func CaseSensitiveJsonIterator() jsoniter.API {
return config
}

// Private copy of jsoniter to try to shield against possible mutations
// CaseSensitiveStrictJsonIterator returns a jsoniterator API that's configured to be
// case-sensitive, but also disallow unknown fields when unmarshalling. It is compatible with
// the encoding/json standard library.
func CaseSensitiveStrictJsonIterator() jsoniter.API {
config := jsoniter.Config{
EscapeHTML: true,
SortMapKeys: true,
ValidateJsonRawMessage: true,
CaseSensitive: true,
DisallowUnknownFields: true,
}.Froze()
// Force jsoniter to decode number to interface{} via int64/float64, if possible.
config.RegisterExtension(&customNumberExtension{})
return config
}

// Private copies of jsoniter to try to shield against possible mutations
// from outside. Still does not protect from package level jsoniter.Register*() functions - someone calling them
// in some other library will mess with every usage of the jsoniter library in the whole program.
// See https://github.com/json-iterator/go/issues/265
var caseSensitiveJsonIterator = CaseSensitiveJsonIterator()
var caseSensitiveStrictJsonIterator = CaseSensitiveStrictJsonIterator()

// gvkWithDefaults returns group kind and version defaulting from provided default
func gvkWithDefaults(actual, defaultGVK schema.GroupVersionKind) schema.GroupVersionKind {
Expand Down Expand Up @@ -160,7 +202,22 @@ func (s *Serializer) Decode(originalData []byte, gvk *schema.GroupVersionKind, i
}

data := originalData
if s.yaml {
var yamlToJSONError error
if s.strict {
// In strict mode always pass the data trough the YAMLToJSONStrict converter.
// This is done to catch duplicate fields for both JSON and YAML. For JSON data,
// the output would equal the input, unless there is a parsing error such as duplicate fields.
altered, err := yaml.YAMLToJSONStrict(data)
if err == nil {
// Update data with the sanitized, strict JSON. We update the data variable here, because the original input might have been YAML. For good JSON this is a no-op.
data = altered
} else {
// Store the error for later use, once a GVK for the input is detected so we can throw a good, structured error.
yamlToJSONError = err
}
}
// If this is the YAML decoder, and the codec is either non-strict, or have failed to decode in strict mode, convert the provided YAML to JSON in non-strict mode.
if s.yaml && (!s.strict || yamlToJSONError != nil) {
altered, err := yaml.YAMLToJSON(data)
if err != nil {
return nil, nil, err
Expand Down Expand Up @@ -213,8 +270,39 @@ func (s *Serializer) Decode(originalData []byte, gvk *schema.GroupVersionKind, i
return nil, actual, err
}

if err := caseSensitiveJsonIterator.Unmarshal(data, obj); err != nil {
return nil, actual, err
// If the provided decoding mode is strict, first check if there was a duplicate error earlier in the original JSON
// or YAML data, and in that case, return it as a DuplicateFieldError. If that's not a problem, decode using the strict
// json-iter configuration, and return a possible error as UnknownFieldError.
// If non-strict mode just use the "normal" non-strict json-iter decoder.
if s.strict {
if yamlToJSONError != nil {
str := yamlToJSONError.Error()
// Detect DuplicateFieldError, otherwise return the generic StrictDecoderError.
// In the future we might want to handle this in a better way. Currently the used libraries
// do not return typed errors.
if strings.Contains(str, `already set in map`) {
neolit123 marked this conversation as resolved.
Show resolved Hide resolved
yamlToJSONError = &runtime.DuplicateFieldError{
StrictDecoderError: *runtime.NewStrictDecoderError(str, *actual, originalData),
}
return nil, actual, yamlToJSONError
}
return nil, actual, runtime.NewStrictDecoderError(str, *actual, originalData)
}
if err := caseSensitiveStrictJsonIterator.Unmarshal(data, obj); err != nil {
str := err.Error()
// Detect UnknownFieldError, otherwise return the generic StrictDecoderError.
if strings.Contains(str, `unknown field`) {
err = &runtime.UnknownFieldError{
StrictDecoderError: *runtime.NewStrictDecoderError(str, *actual, originalData),
}
return nil, actual, err
}
return nil, actual, runtime.NewStrictDecoderError(str, *actual, originalData)
}
} else {
if err := caseSensitiveJsonIterator.Unmarshal(data, obj); err != nil {
return nil, actual, err
}
}
return obj, actual, nil
}
Expand Down