encoding/json accepts two JSON forms when decoding into a Go []byte field: a base64-encoded string or a JSON array of integers (the latter via the reflective slice fallback path). easyjson's hand-rolled jlexer.Lexer.Bytes() only handles the base64 string form, so any caller that previously sent []byte as a JSON array now sees a parse error.
Reproduction
package main
import (
json "encoding/json"
"fmt"
jlexer "github.com/mailru/easyjson/jlexer"
jwriter "github.com/mailru/easyjson/jwriter"
)
type Req struct {
Data []byte `json:"data"`
}
func main() {
inputs := []string{
`{"data": "AQID"}`, // base64 string
`{"data": [1, 2, 3]}`, // array of ints
`{"data": []}`, // empty array
}
for _, raw := range inputs {
var stdlib, ej Req
stdErr := json.Unmarshal([]byte(raw), &stdlib)
r := jlexer.Lexer{Data: []byte(raw)}
easyjson238a128DecodeRepro(&r, &ej)
fmt.Println("\ninput: ", raw)
fmt.Println("std data: ", stdlib.Data)
fmt.Println("std err: ", stdErr) // <nil> for all three
fmt.Println("ez data: ", ej.Data) // err on array forms
fmt.Println("ez err: ", r.Error())
}
}
// Below is taken from the easyjson generated code.
func easyjson238a128DecodeRepro(in *jlexer.Lexer, out *Req) {
isTopLevel := in.IsStart()
if in.IsNull() {
if isTopLevel {
in.Consumed()
}
in.Skip()
return
}
in.Delim('{')
for !in.IsDelim('}') {
key := in.UnsafeFieldName(false)
in.WantColon()
switch key {
case "data":
if in.IsNull() {
in.Skip()
out.Data = nil
} else {
out.Data = in.Bytes()
}
default:
in.SkipRecursive()
}
in.WantComma()
}
in.Delim('}')
if isTopLevel {
in.Consumed()
}
}
func easyjson238a128EncodeRepro(out *jwriter.Writer, in Req) {
out.RawByte('{')
first := true
_ = first
{
const prefix string = ",\"data\":"
out.RawString(prefix[1:])
out.Base64Bytes(in.Data)
}
out.RawByte('}')
}
Output (easyjson v0.9.2):
input: {"data": "AQID"}
std data: [1 2 3]
std err: <nil>
ez data: [1 2 3]
ez err: <nil>
input: {"data": [1, 2, 3]}
std data: [1 2 3]
std err: <nil>
ez data: []
ez err: parse error: expected string near offset 10 of 'data'
input: {"data": []}
std data: []
std err: <nil>
ez data: []
ez err: parse error: expected string near offset 10 of 'data'
Proposed fix
Extend jlexer.Lexer.Bytes() to also accept a [ delimiter and decode an array of uint8s, matching encoding/json's reflective behavior. The change is local to one function; no codegen change required, since generated unmarshalers already call in.Bytes().
func (r *Lexer) Bytes() []byte {
if r.token.kind == TokenUndef && r.Ok() {
r.FetchToken()
}
if !r.Ok() {
r.errInvalidToken("string")
return nil
}
if r.token.kind == TokenDelim && r.token.delimValue == '[' {
return r.bytesFromArray()
}
if r.token.kind != TokenString {
r.errInvalidToken("string")
return nil
}
if err := r.unescapeStringToken(); err != nil {
r.errInvalidToken("string")
return nil
}
ret := make([]byte, base64.StdEncoding.DecodedLen(len(r.token.byteValue)))
n, err := base64.StdEncoding.Decode(ret, r.token.byteValue)
if err != nil {
r.fatalError = &LexerError{
Reason: err.Error(),
}
return nil
}
r.consume()
return ret[:n]
}
encoding/jsonaccepts two JSON forms when decoding into a Go []byte field: a base64-encoded string or a JSON array of integers (the latter via the reflective slice fallback path). easyjson's hand-rolled jlexer.Lexer.Bytes() only handles the base64 string form, so any caller that previously sent []byte as a JSON array now sees a parse error.Reproduction
Output (easyjson v0.9.2):
Proposed fix
Extend
jlexer.Lexer.Bytes()to also accept a[delimiter and decode an array ofuint8s, matchingencoding/json's reflective behavior. The change is local to one function; no codegen change required, since generated unmarshalers already callin.Bytes().