Skip to content
Permalink
Browse files

WIP - start implementing string table

  • Loading branch information
jennybuckley committed Nov 12, 2019
1 parent 0be0eb0 commit bad7781b83009d1a863c997e4e8e92627270918e
@@ -261,7 +261,7 @@ var (
}()
)

func emitV2Prefix(stream *jsoniter.Stream, vt v2ValueType, et v2EntryType) error {
func emitV2Prefix(stream value.Stream, vt v2ValueType, et v2EntryType) error {
var scratch [3]byte
n := v2CombineTypes(et, vt)
str, ok := v2NumberToAscii[n]
@@ -280,7 +280,7 @@ func emitV2Prefix(stream *jsoniter.Stream, vt v2ValueType, et v2EntryType) error
}

// you must write the trailing "," if you need it.
func serializePathElementToStreamV2(stream *jsoniter.Stream, pe PathElement, et v2EntryType) error {
func serializePathElementToStreamV2(stream value.Stream, pe PathElement, et v2EntryType) error {
switch {
case pe.FieldName != nil:
if err := emitV2Prefix(stream, vtField, et); err != nil {
@@ -291,8 +291,16 @@ func serializePathElementToStreamV2(stream *jsoniter.Stream, pe PathElement, et
if err := emitV2Prefix(stream, vtKey, et); err != nil {
return err
}
v := value.Value{MapValue: pe.Key}
v.WriteJSONStream(stream)
stream.WriteObjectStart()
kvPairs := *pe.Key
for i := range kvPairs {
if i > 0 {
stream.WriteMore()
}
stream.WriteObjectField(kvPairs[i].Name)
kvPairs[i].Value.WriteJSONStream(stream)
}
stream.WriteObjectEnd()
case pe.Value != nil:
if err := emitV2Prefix(stream, vtValue, et); err != nil {
return err
@@ -20,9 +20,11 @@ import (
"bytes"
"fmt"
"io"
"strconv"
"unsafe"

jsoniter "github.com/json-iterator/go"
"sigs.k8s.io/structured-merge-diff/fieldpath/strings"
"sigs.k8s.io/structured-merge-diff/value"
)

@@ -60,8 +62,16 @@ func (s *Set) ToJSON_V2Experimental() ([]byte, error) {
}

func (s *Set) ToJSONStream_V2Experimental(w io.Writer) error {
stream := writePool.BorrowStream(w)
defer writePool.ReturnStream(stream)
innterStream := writePool.BorrowStream(w)
defer writePool.ReturnStream(innterStream)
reverseStringTable, err := strings.GetReverseTable(strings.DefaultVersion)
if err != nil {
return err
}
stream := &streamWithStringTable{
Stream: innterStream,
stringTable: reverseStringTable,
}

if err := manageMemory(stream); err != nil {
return err
@@ -70,15 +80,42 @@ func (s *Set) ToJSONStream_V2Experimental(w io.Writer) error {
var r reusableBuilder

stream.WriteArrayStart()
err := s.emitContents_v2(false, stream, &r)
stream.WriteInt(strings.DefaultVersion)
stream.WriteRaw(",")
err = s.emitContents_v2(false, stream, &r)
if err != nil {
return err
}
stream.WriteArrayEnd()
return stream.Flush()
}

func manageMemory(stream *jsoniter.Stream) error {
type streamWithStringTable struct {
*jsoniter.Stream

stringTable map[string]int
}

func (s *streamWithStringTable) WriteString(str string) {
if x, ok := s.stringTable[str]; ok {
s.Stream.WriteRaw("!")
s.Stream.WriteInt(x)
} else {
s.Stream.WriteString(str)
}
}

func (s *streamWithStringTable) WriteObjectField(str string) {
if x, ok := s.stringTable[str]; ok {
s.Stream.WriteRaw("!")
s.Stream.WriteInt(x)
s.Stream.WriteRaw(":")
} else {
s.Stream.WriteObjectField(str)
}
}

func manageMemory(stream value.Stream) error {
// Help jsoniter manage its buffers--without this, it does a bunch of
// alloctaions that are not necessary. They were probably optimizing
// for folks using the buffer directly.
@@ -201,7 +238,7 @@ func (s *Set) emitContents_v1(includeSelf bool, stream *jsoniter.Stream, r *reus
return manageMemory(stream)
}

func (s *Set) emitContents_v2(includeSelf bool, stream *jsoniter.Stream, r *reusableBuilder) error {
func (s *Set) emitContents_v2(includeSelf bool, stream value.Stream, r *reusableBuilder) error {
mi, ci := 0, 0
first := true
preWrite := func() {
@@ -295,10 +332,146 @@ func (s *Set) emitContents_v2(includeSelf bool, stream *jsoniter.Stream, r *reus
return manageMemory(stream)
}

type replacer struct {
writer io.Writer
first bool
skipTheRest bool
readStringTableVersion bool
inEscapeSequence bool
inQuotes bool
readIndex bool
inputBuffer []byte
outputBuffer []byte
indexBuffer []byte
stringTable []string
}

func newReplacer(writer io.Writer) *replacer {
p := replacer{}
p.writer = writer
p.first = true
return &p
}

func (p *replacer) write(b ...byte) {
p.outputBuffer = append(p.outputBuffer, b...)
}

func (p *replacer) flush() error {
_, err := p.writer.Write(p.outputBuffer)
p.outputBuffer = p.outputBuffer[:0]
return err
}

func (p *replacer) read(b byte) (err error) {
if p.skipTheRest {
p.write(b)
return nil
}

// Parse the string table version
// This will be at the beginning of the entire object and start with a '['
// if the object doesn't start that way, just skip the rest of it.
if p.first {
if b == byte('[') {
p.readStringTableVersion = true
} else {
p.skipTheRest = true
}
p.first = false
p.write(b)
return nil
}
if p.readStringTableVersion {
if b == byte(',') || b == byte(':') || b == byte('}') || b == byte(']') {
k := parseIndex(p.indexBuffer)

if p.stringTable, err = strings.GetTable(k); err != nil {
return err
}

p.readStringTableVersion = false
p.indexBuffer = p.indexBuffer[:0]
} else {
p.indexBuffer = append(p.indexBuffer, b)
}
p.write(b)
return nil
}

// Identify and parse an index of an item in the string table
// This will start with a '!'.
if !p.inQuotes && b == byte('!') {
p.readIndex = true
return nil
}
if p.readIndex {
if b == byte(',') || b == byte(':') || b == byte('}') || b == byte(']') {
k := parseIndex(p.indexBuffer)

if k < len(p.stringTable) {
p.write([]byte(fmt.Sprintf("%q", p.stringTable[k]))...)
p.write(b)
} else {
return fmt.Errorf("unable to look up %v in the string table", k)
}

p.readIndex = false
p.indexBuffer = p.indexBuffer[:0]
} else {
p.indexBuffer = append(p.indexBuffer, b)
}
return nil
}

// Update the state of the parser so it knows what part of json it's reading
p.inQuotes = !p.inQuotes && (b == byte('"')) || p.inQuotes && (p.inEscapeSequence || !(b == byte('"')))
p.inEscapeSequence = !p.inEscapeSequence && b == byte('\\')

p.write(b)
return nil
}

func parseIndex(b []byte) int {
n, _ := strconv.Atoi(string(b))
return n
}

func readerWithStringTable(r io.Reader) *io.PipeReader {
out, in := io.Pipe()
go func() {
defer in.Close()
p := newReplacer(in)
inputBuffer := make([]byte, 100)
for {
if _, err := r.Read(inputBuffer); err != nil {
return
}
if p.skipTheRest {
p.write(inputBuffer...)
} else {
for _, b := range inputBuffer {
if err := p.read(b); err != nil {
in.CloseWithError(err)
return
}
}
}
if err := p.flush(); err != nil {
return
}
}
}()
return out
}

// FromJSON clears s and reads a JSON formatted set structure.
func (s *Set) FromJSON(r io.Reader) error {
pr := readerWithStringTable(r)
defer pr.Close()

// The iterator pool is completely useless for memory management, grrr.
iter := jsoniter.Parse(jsoniter.ConfigCompatibleWithStandardLibrary, r, 4096)
iter := jsoniter.Parse(jsoniter.ConfigCompatibleWithStandardLibrary, pr, 4096)

next := iter.WhatIsNext()
switch next {
@@ -311,7 +484,7 @@ func (s *Set) FromJSON(r io.Reader) error {
}
return iter.Error
case jsoniter.ArrayValue:
found, _ := readIter_v2(iter)
found, _ := readIter_v2(iter, true)
if found == nil {
*s = Set{}
} else {
@@ -383,17 +556,22 @@ func readIter_v1(iter *jsoniter.Iterator) (children *Set, isMember bool) {

// returns true if this subtree is also (or only) a member of parent; s is nil
// if there are no further children.
func readIter_v2(iter *jsoniter.Iterator) (children *Set, isMember bool) {
func readIter_v2(iter *jsoniter.Iterator, root bool) (children *Set, isMember bool) {
const (
KT int = iota
KEY
BODY
ST
)
step := KT
var vt v2ValueType
var et v2EntryType
var pe PathElement

if root {
step = ST
}

doMember := func() {
if children == nil {
children = &Set{}
@@ -410,6 +588,13 @@ func readIter_v2(iter *jsoniter.Iterator) (children *Set, isMember bool) {
}

iter.ReadArrayCB(func(iter *jsoniter.Iterator) bool {
if step == ST {
// first is the string table version, don't do anything with i
_ = iter.ReadInt()
step = KT
return true
}

if step == KT {
// first is the key type
number := iter.ReadInt()
@@ -437,16 +622,21 @@ func readIter_v2(iter *jsoniter.Iterator) (children *Set, isMember bool) {
}
pe.Value = &v
case vtKey:
v, err := value.ReadJSONIter(iter)
if err != nil {
iter.Error = err
return false
}
if v.MapValue == nil {
iter.Error = fmt.Errorf("expected key value pairs but got %#v", v)
kvPairs := value.FieldList{}
if next := iter.WhatIsNext(); next != jsoniter.ObjectValue {
iter.Error = fmt.Errorf("expecting array got: %v", next)
return false
}
pe.Key = v.MapValue
iter.ReadObjectCB(func(iter *jsoniter.Iterator, key string) bool {
v, err := value.ReadJSONIter(iter)
if err != nil {
iter.Error = err
return false
}
kvPairs = append(kvPairs, value.Field{Name: key, Value: v})
return true
})
pe.Key = &kvPairs
case vtIndex:
i := iter.ReadInt()
pe.Index = &i
@@ -473,7 +663,7 @@ func readIter_v2(iter *jsoniter.Iterator) (children *Set, isMember bool) {
return false
}

grandchildren, childIsMember := readIter_v2(iter)
grandchildren, childIsMember := readIter_v2(iter, false)
if childIsMember {
doMember()
}
@@ -83,8 +83,9 @@ func TestSerializeV1GoldenData(t *testing.T) {

func TestSerializeV2GoldenData(t *testing.T) {
examples := []string{
`[0,"aaa",0,"aab",0,"aac",0,"aad",0,"aae",0,"aaf",3,{"name":"first"},3,{"name":"second"},3,{"port":443,"protocol":"tcp"},3,{"port":443,"protocol":"udp"},1,1,1,2,1,3,1,"aa",1,"ab",1,true,2,1,2,2,2,3,2,4]`,
`[4,"aaa",[7,{"name":"second"},[5,3,[0,"aab"]],1,3,1,true],4,"aab",[0,"aaa",4,"aaf",[7,{"port":443,"protocol":"udp"},[3,{"port":443,"protocol":"tcp"}]],3,{"name":"first"}],4,"aac",[4,"aaa",[1,1],0,"aac",5,3,[3,{"name":"second"}]],4,"aad",[4,"aac",[1,1],4,"aaf",[7,{"name":"first"},[3,{"name":"first"}]],6,1,[2,1,6,3,[1,true]]],4,"aae",[0,"aae",7,{"port":443,"protocol":"tcp"},[3,{"port":443,"protocol":"udp"}],6,4,[0,"aaf"]],4,"aaf",[6,1,[0,"aac"],2,2,2,3],7,{"name":"first"},[4,"aad",[0,"aaf"]],7,{"port":443,"protocol":"tcp"},[4,"aaa",[0,"aad"]],7,{"port":443,"protocol":"udp"},[0,"aac",7,{"name":"first"},[2,3],7,{"port":443,"protocol":"udp"},[2,4]],5,1,[4,"aac",[2,4],0,"aaf",3,{"port":443,"protocol":"tcp"}],5,2,[4,"aad",[0,"aaf"],2,1],5,3,[0,"aaa",3,{"name":"first"},2,2],5,"aa",[4,"aab",[0,"aaf"],0,"aae",7,{"name":"first"},[0,"aad"],2,2],5,"ab",[4,"aaf",[2,4],3,{"port":443,"protocol":"tcp"},3,{"port":443,"protocol":"udp"},5,1,[3,{"port":443,"protocol":"udp"}],6,1,[4,"aae",[2,4]]],5,true,[7,{"name":"second"},[0,"aaa"],6,2,[3,{"port":443,"protocol":"tcp"}]],6,1,[6,3,[0,"aaf"]],6,2,[0,"aae",7,{"port":443,"protocol":"tcp"},[1,1]],6,3,[4,"aab",[5,true,[1,"aa"]],0,"aaf",2,1],6,4,[5,"aa",[4,"aab",[3,{"name":"second"}]]]]`,
`[1,0,!0,3,{!1:{!2:!0}},1,!1,1,["t",!1],1,{!2:[!1]},2,2]`,
`[1,0,"aaa",0,"aab",0,"aac",0,"aad",0,"aae",0,"aaf",3,{!0:"first"},3,{!0:"second"},3,{!1:443,!2:"tcp"},3,{!1:443,!2:"udp"},1,1,1,2,1,3,1,"aa",1,"ab",1,true,2,1,2,2,2,3,2,4]`,
`[1,4,"aaa",[7,{!0:"second"},[5,3,[0,"aab"]],1,3,1,true],4,"aab",[0,"aaa",4,"aaf",[7,{!1:443,!2:"udp"},[3,{!1:443,!2:"tcp"}]],3,{!0:"first"}],4,"aac",[4,"aaa",[1,1],0,"aac",5,3,[3,{!0:"second"}]],4,"aad",[4,"aac",[1,1],4,"aaf",[7,{!0:"first"},[3,{!0:"first"}]],6,1,[2,1,6,3,[1,true]]],4,"aae",[0,"aae",7,{!1:443,!2:"tcp"},[3,{!1:443,!2:"udp"}],6,4,[0,"aaf"]],4,"aaf",[6,1,[0,"aac"],2,2,2,3],7,{!0:"first"},[4,"aad",[0,"aaf"]],7,{!1:443,!2:"tcp"},[4,"aaa",[0,"aad"]],7,{!1:443,!2:"udp"},[0,"aac",7,{!0:"first"},[2,3],7,{!1:443,!2:"udp"},[2,4]],5,1,[4,"aac",[2,4],0,"aaf",3,{!1:443,!2:"tcp"}],5,2,[4,"aad",[0,"aaf"],2,1],5,3,[0,"aaa",3,{!0:"first"},2,2],5,"aa",[4,"aab",[0,"aaf"],0,"aae",7,{!0:"first"},[0,"aad"],2,2],5,"ab",[4,"aaf",[2,4],3,{!1:443,!2:"tcp"},3,{!1:443,!2:"udp"},5,1,[3,{!1:443,!2:"udp"}],6,1,[4,"aae",[2,4]]],5,true,[7,{!0:"second"},[0,"aaa"],6,2,[3,{!1:443,!2:"tcp"}]],6,1,[6,3,[0,"aaf"]],6,2,[0,"aae",7,{!1:443,!2:"tcp"},[1,1]],6,3,[4,"aab",[5,true,[1,"aa"]],0,"aaf",2,1],6,4,[5,"aa",[4,"aab",[3,{!0:"second"}]]]]`,
}
for i, str := range examples {
t.Run(fmt.Sprintf("%v", i), func(t *testing.T) {

0 comments on commit bad7781

Please sign in to comment.
You can’t perform that action at this time.