Skip to content

[]byte parsing rejects JSON arrays of integers, unlike encoding/json #434

@duckbrain

Description

@duckbrain

encoding/json accepts two JSON forms when decoding into a Go []byte field: a base64-encoded string or a JSON array of integers (the latter via the reflective slice fallback path). easyjson's hand-rolled jlexer.Lexer.Bytes() only handles the base64 string form, so any caller that previously sent []byte as a JSON array now sees a parse error.

Reproduction

package main

import (
	json "encoding/json"
	"fmt"
	jlexer "github.com/mailru/easyjson/jlexer"
	jwriter "github.com/mailru/easyjson/jwriter"
)

type Req struct {
	Data []byte `json:"data"`
}

func main() {
	inputs := []string{
		`{"data": "AQID"}`,    // base64 string
		`{"data": [1, 2, 3]}`, // array of ints
		`{"data": []}`,        // empty array
	}

	for _, raw := range inputs {
		var stdlib, ej Req

		stdErr := json.Unmarshal([]byte(raw), &stdlib)

		r := jlexer.Lexer{Data: []byte(raw)}
		easyjson238a128DecodeRepro(&r, &ej)

		fmt.Println("\ninput:    ", raw)
		fmt.Println("std data: ", stdlib.Data)
		fmt.Println("std err:  ", stdErr)  // <nil> for all three
		fmt.Println("ez data:  ", ej.Data) // err on array forms
		fmt.Println("ez err:   ", r.Error())
	}
}

// Below is taken from the easyjson generated code.

func easyjson238a128DecodeRepro(in *jlexer.Lexer, out *Req) {
	isTopLevel := in.IsStart()
	if in.IsNull() {
		if isTopLevel {
			in.Consumed()
		}
		in.Skip()
		return
	}
	in.Delim('{')
	for !in.IsDelim('}') {
		key := in.UnsafeFieldName(false)
		in.WantColon()
		switch key {
		case "data":
			if in.IsNull() {
				in.Skip()
				out.Data = nil
			} else {
				out.Data = in.Bytes()
			}
		default:
			in.SkipRecursive()
		}
		in.WantComma()
	}
	in.Delim('}')
	if isTopLevel {
		in.Consumed()
	}
}
func easyjson238a128EncodeRepro(out *jwriter.Writer, in Req) {
	out.RawByte('{')
	first := true
	_ = first
	{
		const prefix string = ",\"data\":"
		out.RawString(prefix[1:])
		out.Base64Bytes(in.Data)
	}
	out.RawByte('}')
}

Output (easyjson v0.9.2):

input:     {"data": "AQID"}
std data:  [1 2 3]
std err:   <nil>
ez data:   [1 2 3]
ez err:    <nil>

input:     {"data": [1, 2, 3]}
std data:  [1 2 3]
std err:   <nil>
ez data:   []
ez err:    parse error: expected string near offset 10 of 'data'

input:     {"data": []}
std data:  []
std err:   <nil>
ez data:   []
ez err:    parse error: expected string near offset 10 of 'data'

Proposed fix

Extend jlexer.Lexer.Bytes() to also accept a [ delimiter and decode an array of uint8s, matching encoding/json's reflective behavior. The change is local to one function; no codegen change required, since generated unmarshalers already call in.Bytes().

func (r *Lexer) Bytes() []byte {
	if r.token.kind == TokenUndef && r.Ok() {
		r.FetchToken()
	}
	if !r.Ok() {
		r.errInvalidToken("string")
		return nil
	}
	if r.token.kind == TokenDelim && r.token.delimValue == '[' {
		return r.bytesFromArray()
	}
	if r.token.kind != TokenString {
		r.errInvalidToken("string")
		return nil
	}
	if err := r.unescapeStringToken(); err != nil {
		r.errInvalidToken("string")
		return nil
	}
	ret := make([]byte, base64.StdEncoding.DecodedLen(len(r.token.byteValue)))
	n, err := base64.StdEncoding.Decode(ret, r.token.byteValue)
	if err != nil {
		r.fatalError = &LexerError{
			Reason: err.Error(),
		}
		return nil
	}

	r.consume()
	return ret[:n]
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions