diff --git a/benchmarks/jsoniter_large_file_test.go b/benchmarks/jsoniter_large_file_test.go index 465eaec9..3ce2716f 100644 --- a/benchmarks/jsoniter_large_file_test.go +++ b/benchmarks/jsoniter_large_file_test.go @@ -1,11 +1,13 @@ package test import ( + "bytes" "encoding/json" - "github.com/json-iterator/go" "io/ioutil" "os" "testing" + + jsoniter "github.com/json-iterator/go" ) //func Test_large_file(t *testing.T) { @@ -156,3 +158,86 @@ func Benchmark_json_large_file(b *testing.B) { } } } + +func scan(iter *jsoniter.Iterator) { + next := iter.WhatIsNext() + switch next { + case jsoniter.InvalidValue: + iter.Skip() + case jsoniter.StringValue: + iter.Skip() + case jsoniter.NumberValue: + iter.Skip() + case jsoniter.NilValue: + iter.Skip() + case jsoniter.BoolValue: + iter.Skip() + case jsoniter.ArrayValue: + iter.ReadArrayCB(func(iter *jsoniter.Iterator) bool { + scan(iter) + return true + }) + case jsoniter.ObjectValue: + iter.ReadMapCB(func(iter *jsoniter.Iterator, key string) bool { + scan(iter) + return true + }) + default: + iter.Skip() + } +} + +func scanBytes(iter *jsoniter.Iterator, buf []byte) []byte { + next := iter.WhatIsNext() + switch next { + case jsoniter.InvalidValue: + iter.Skip() + case jsoniter.StringValue: + iter.Skip() + case jsoniter.NumberValue: + iter.Skip() + case jsoniter.NilValue: + iter.Skip() + case jsoniter.BoolValue: + iter.Skip() + case jsoniter.ArrayValue: + iter.ReadArrayCB(func(iter *jsoniter.Iterator) bool { + buf = scanBytes(iter, buf) + return true + }) + case jsoniter.ObjectValue: + iter.ReadMapCBFieldAsBytes(buf, func(iter *jsoniter.Iterator, field []byte) bool { + buf = scanBytes(iter, field) + return true + }) + default: + iter.Skip() + } + return buf +} + +func Benchmark_custom_scan(b *testing.B) { + file, _ := os.Open("/tmp/large-file.json") + fb, _ := ioutil.ReadAll(file) + file.Close() + + // Benchmark_scan_string/string-12 100000 15429 ns/op 4952 B/op 76 allocs/op + // Benchmark_scan_string/bytes-12 100000 12741 ns/op 4312 B/op 6 allocs/op + + b.Run("string", func(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + for n := 0; n < b.N; n++ { + iter := jsoniter.Parse(jsoniter.ConfigDefault, bytes.NewReader(fb), 4096) + scan(iter) + } + }) + b.Run("bytes", func(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + for n := 0; n < b.N; n++ { + iter := jsoniter.Parse(jsoniter.ConfigDefault, bytes.NewReader(fb), 4096) + scanBytes(iter, nil) + } + }) +} diff --git a/iter_object.go b/iter_object.go index 1c575767..dc6a4220 100644 --- a/iter_object.go +++ b/iter_object.go @@ -3,6 +3,7 @@ package jsoniter import ( "fmt" "strings" + "unsafe" ) // ReadObject read one field from object. @@ -59,7 +60,11 @@ func (iter *Iterator) readFieldHash() int64 { b := iter.buf[i] if b == '\\' { iter.head = i - for _, b := range iter.readStringSlowPath() { + decodedBytes := iter.readStringSlowPath(nil) + // Iterate over a string to ensure we look at + // utf8 encoded runes and not bytes. + str := *(*string)(unsafe.Pointer(&decodedBytes)) + for _, b := range str { if 'A' <= b && b <= 'Z' && !iter.cfg.caseSensitive { b += 'a' - 'A' } @@ -202,6 +207,58 @@ func (iter *Iterator) ReadMapCB(callback func(*Iterator, string) bool) bool { return false } +// ReadMapCBFieldAsBytes reads a map with a callback. The field name will be +// decoded properly, but it's passed as a []byte to permit zero allocation +// reads. The buffer will be reused, so you must copy it if you want to store +// its contents. +func (iter *Iterator) ReadMapCBFieldAsBytes(fieldNameBuffer []byte, callback func(*Iterator, []byte) bool) bool { + c := iter.nextToken() + if c == '{' { + c = iter.nextToken() + if c == '"' { + iter.unreadByte() + fieldNameBuffer = fieldNameBuffer[:0] + fieldNameBuffer = iter.ReadStringIntoBuffer(fieldNameBuffer) + if iter.nextToken() != ':' { + iter.ReportError("ReadMapCBFieldAsBytes", "expect : after object field, but found "+string([]byte{c})) + return false + } + if !callback(iter, fieldNameBuffer) { + return false + } + c = iter.nextToken() + for c == ',' { + fieldNameBuffer = fieldNameBuffer[:0] + fieldNameBuffer = iter.ReadStringIntoBuffer(fieldNameBuffer) + if iter.nextToken() != ':' { + iter.ReportError("ReadMapCBFieldAsBytes", "expect : after object field, but found "+string([]byte{c})) + return false + } + if !callback(iter, fieldNameBuffer) { + return false + } + c = iter.nextToken() + } + if c != '}' { + iter.ReportError("ReadMapCBFieldAsBytes", `object not ended with }`) + return false + } + return true + } + if c == '}' { + return true + } + iter.ReportError("ReadMapCBFieldAsBytes", `expect " after }, but found `+string([]byte{c})) + return false + } + if c == 'n' { + iter.skipThreeBytes('u', 'l', 'l') + return true // null + } + iter.ReportError("ReadMapCBFieldAsBytes", `expect { or n, but found `+string([]byte{c})) + return false +} + func (iter *Iterator) readObjectStart() bool { c := iter.nextToken() if c == '{' { diff --git a/iter_skip_strict.go b/iter_skip_strict.go index 6cf66d04..04e7acb6 100644 --- a/iter_skip_strict.go +++ b/iter_skip_strict.go @@ -84,7 +84,7 @@ func (iter *Iterator) trySkipString() bool { func (iter *Iterator) skipObject() { iter.unreadByte() - iter.ReadObjectCB(func(iter *Iterator, field string) bool { + iter.ReadMapCBFieldAsBytes(nil, func(iter *Iterator, field []byte) bool { iter.Skip() return true }) diff --git a/iter_str.go b/iter_str.go index adc487ea..51504ad0 100644 --- a/iter_str.go +++ b/iter_str.go @@ -20,35 +20,70 @@ func (iter *Iterator) ReadString() (ret string) { } else if c < ' ' { iter.ReportError("ReadString", fmt.Sprintf(`invalid control character found: %d`, c)) - return + return "" } } - return iter.readStringSlowPath() + ret = string(iter.readStringSlowPath(nil)) + return ret } else if c == 'n' { iter.skipThreeBytes('u', 'l', 'l') return "" } iter.ReportError("ReadString", `expects " or n, but found `+string([]byte{c})) - return + return "" +} + +// ReadStringIntoBuffer reads a string, storing it into b. If b doesn't have +// enough capacity, it will be enlarged, but otherwise no allocations will be +// done. The possibly-enlarged buffer will be returned. Passing nil will cause +// a buffer to be allocated. +func (iter *Iterator) ReadStringIntoBuffer(b []byte) []byte { + c := iter.nextToken() + if c == '"' { + for i := iter.head; i < iter.tail; i++ { + c := iter.buf[i] + if c == '"' { + b = append(b, iter.buf[iter.head:i]...) + iter.head = i + 1 + return b + } else if c == '\\' { + break + } else if c < ' ' { + iter.ReportError("ReadStringIntoBuffer", + fmt.Sprintf(`invalid control character found: %d`, c)) + return b + } + } + b = iter.readStringSlowPath(b) + return b + } else if c == 'n' { + iter.skipThreeBytes('u', 'l', 'l') + return b + } + iter.ReportError("ReadStringIntoBuffer", `expects " or n, but found `+string([]byte{c})) + return b } -func (iter *Iterator) readStringSlowPath() (ret string) { - var str []byte +func (iter *Iterator) readStringSlowPath(dest []byte) []byte { + dn := len(dest) var c byte for iter.Error == nil { c = iter.readByte() if c == '"' { - return string(str) + return dest } if c == '\\' { c = iter.readByte() - str = iter.readEscapedChar(c, str) + dest = iter.readEscapedChar(c, dest) } else { - str = append(str, c) + dest = append(dest, c) } } iter.ReportError("readStringSlowPath", "unexpected end of input") - return + if len(dest) > dn { + dest = dest[:dn] + } + return dest } func (iter *Iterator) readEscapedChar(c byte, str []byte) []byte { diff --git a/justkey_test.go b/justkey_test.go new file mode 100644 index 00000000..c0fc7287 --- /dev/null +++ b/justkey_test.go @@ -0,0 +1,103 @@ +package jsoniter_test + +import ( + "strings" + + jsoniter "github.com/json-iterator/go" + + "testing" +) + +type keycollector []byte + +func (kc *keycollector) readKeysString(iter *jsoniter.Iterator) { + next := iter.WhatIsNext() + switch next { + case jsoniter.InvalidValue: + iter.Skip() + case jsoniter.StringValue: + iter.Skip() + case jsoniter.NumberValue: + iter.Skip() + case jsoniter.NilValue: + iter.Skip() + case jsoniter.BoolValue: + iter.Skip() + case jsoniter.ArrayValue: + iter.ReadArrayCB(func(iter *jsoniter.Iterator) bool { + kc.readKeysString(iter) + return true + }) + case jsoniter.ObjectValue: + iter.ReadMapCB(func(iter *jsoniter.Iterator, key string) bool { + *kc = append(*kc, []byte(key)...) + kc.readKeysString(iter) + return true + }) + default: + iter.Skip() + } +} + +func (kc *keycollector) readKeysBytes(iter *jsoniter.Iterator, buf []byte) []byte { + next := iter.WhatIsNext() + switch next { + case jsoniter.InvalidValue: + iter.Skip() + case jsoniter.StringValue: + iter.Skip() + case jsoniter.NumberValue: + iter.Skip() + case jsoniter.NilValue: + iter.Skip() + case jsoniter.BoolValue: + iter.Skip() + case jsoniter.ArrayValue: + iter.ReadArrayCB(func(iter *jsoniter.Iterator) bool { + buf = kc.readKeysBytes(iter, buf) + return true + }) + case jsoniter.ObjectValue: + iter.ReadMapCBFieldAsBytes(buf, func(iter *jsoniter.Iterator, key []byte) bool { + *kc = append(*kc, key...) + buf = kc.readKeysBytes(iter, key) + return true + }) + default: + iter.Skip() + } + return buf +} + +func TestReadKeys(t *testing.T) { + str := `{ + "gravatar": { + "handle": "buger", + "urls": [ + ], + "avatar": "http://1.gravatar.com/avatar/f7c8edd577d13b8930d5522f28123510", + "avatars": [ + { + "url": "http://1.gravatar.com/avatar/f7c8edd577d13b8930d5522f28123510", + "type": "thumbnail" + } + ] + }` + + want := "gravatarhandleurlsavataravatarsurltype" + + var keysString keycollector + keysString.readKeysString(jsoniter.Parse(jsoniter.ConfigDefault, strings.NewReader(str), 4096)) + got := string(keysString) + if got != want { + t.Errorf("wanted %v, got %v", want, got) + } + + var keysBytes keycollector + keysBytes.readKeysBytes(jsoniter.Parse(jsoniter.ConfigDefault, strings.NewReader(str), 4096), nil) + got = string(keysBytes) + if got != want { + t.Errorf("wanted %v, got %v", want, got) + } + +} diff --git a/reflect_map.go b/reflect_map.go index 547b4421..dccfc4ce 100644 --- a/reflect_map.go +++ b/reflect_map.go @@ -156,7 +156,7 @@ func (decoder *mapDecoder) Decode(ptr unsafe.Pointer, iter *Iterator) { mapType.UnsafeSet(ptr, mapType.UnsafeMakeMap(0)) } if c != '{' { - iter.ReportError("ReadMapCB", `expect { or n, but found `+string([]byte{c})) + iter.ReportError("Decode", `expect { or n, but found `+string([]byte{c})) return } c = iter.nextToken() @@ -164,7 +164,7 @@ func (decoder *mapDecoder) Decode(ptr unsafe.Pointer, iter *Iterator) { return } if c != '"' { - iter.ReportError("ReadMapCB", `expect " after }, but found `+string([]byte{c})) + iter.ReportError("Decode", `expect " after }, but found `+string([]byte{c})) return } iter.unreadByte() @@ -172,7 +172,7 @@ func (decoder *mapDecoder) Decode(ptr unsafe.Pointer, iter *Iterator) { decoder.keyDecoder.Decode(key, iter) c = iter.nextToken() if c != ':' { - iter.ReportError("ReadMapCB", "expect : after object field, but found "+string([]byte{c})) + iter.ReportError("Decode", "expect : after object field, but found "+string([]byte{c})) return } elem := decoder.elemType.UnsafeNew() @@ -183,7 +183,7 @@ func (decoder *mapDecoder) Decode(ptr unsafe.Pointer, iter *Iterator) { decoder.keyDecoder.Decode(key, iter) c = iter.nextToken() if c != ':' { - iter.ReportError("ReadMapCB", "expect : after object field, but found "+string([]byte{c})) + iter.ReportError("Decode", "expect : after object field, but found "+string([]byte{c})) return } elem := decoder.elemType.UnsafeNew() @@ -191,7 +191,7 @@ func (decoder *mapDecoder) Decode(ptr unsafe.Pointer, iter *Iterator) { decoder.mapType.UnsafeSetIndex(ptr, key, elem) } if c != '}' { - iter.ReportError("ReadMapCB", `expect }, but found `+string([]byte{c})) + iter.ReportError("Decode", `expect }, but found `+string([]byte{c})) } } @@ -202,13 +202,13 @@ type numericMapKeyDecoder struct { func (decoder *numericMapKeyDecoder) Decode(ptr unsafe.Pointer, iter *Iterator) { c := iter.nextToken() if c != '"' { - iter.ReportError("ReadMapCB", `expect ", but found `+string([]byte{c})) + iter.ReportError("Decode", `expect ", but found `+string([]byte{c})) return } decoder.decoder.Decode(ptr, iter) c = iter.nextToken() if c != '"' { - iter.ReportError("ReadMapCB", `expect ", but found `+string([]byte{c})) + iter.ReportError("Decode", `expect ", but found `+string([]byte{c})) return } } diff --git a/skip_tests/jsoniter_skip_test.go b/skip_tests/jsoniter_skip_test.go index 785899a9..af237c22 100644 --- a/skip_tests/jsoniter_skip_test.go +++ b/skip_tests/jsoniter_skip_test.go @@ -148,6 +148,7 @@ func Benchmark_jsoniter_skip(b *testing.B) { }, "code": 200 }`) + b.ReportAllocs() for n := 0; n < b.N; n++ { result := TestResp{} iter := jsoniter.ParseBytes(jsoniter.ConfigDefault, input)