Skip to content

Commit

Permalink
go-bindata: balance backquoted string concatenation
Browse files Browse the repository at this point in the history
The current code prints long string concatenations like

	a + b + c + d + e + f

The new code prints

	((a + (b + c)) + (d + (e + f)))

This has the effect of minimizing the overall height of the
expression tree, which makes recursive traversals of the
tree a little happier and makes incremental computation
of string constants as in go/types and go/vet MUCH happier.

The compilers and go/types should be fixed so that this
isn't necessary, but we can't fix the old releases, and this
is very easy, so it seems worth doing.

For golang/go#23222.
  • Loading branch information
rsc authored and kevinburke committed Jan 5, 2018
1 parent c45d99b commit e5078a2
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 9 deletions.
57 changes: 48 additions & 9 deletions release.go
Expand Up @@ -84,17 +84,56 @@ func writeReleaseAsset(w io.Writer, c *Config, asset *Asset) error {
return asset_release_common(w, c, asset)
}

var (
backquote = []byte("`")
bom = []byte("\xEF\xBB\xBF")
)

// sanitize prepares a valid UTF-8 string as a raw string constant.
// Based on https://code.google.com/p/go/source/browse/godoc/static/makestatic.go?repo=tools
func sanitize(b []byte) []byte {
// Replace ` with `+"`"+`
b = bytes.Replace(b, []byte("`"), []byte("`+\"`\"+`"), -1)

// Replace BOM with `+"\xEF\xBB\xBF"+`
// (A BOM is valid UTF-8 but not permitted in Go source files.
// I wouldn't bother handling this, but for some insane reason
// jquery.js has a BOM somewhere in the middle.)
return bytes.Replace(b, []byte("\xEF\xBB\xBF"), []byte("`+\"\\xEF\\xBB\\xBF\"+`"), -1)
var chunks [][]byte
for i, b := range bytes.Split(b, backquote) {
if i > 0 {
chunks = append(chunks, backquote)
}
for j, c := range bytes.Split(b, bom) {
if j > 0 {
chunks = append(chunks, bom)
}
if len(c) > 0 {
chunks = append(chunks, c)
}
}
}

var buf bytes.Buffer
sanitizeChunks(&buf, chunks)
return buf.Bytes()
}

func sanitizeChunks(buf *bytes.Buffer, chunks [][]byte) {
n := len(chunks)
if n >= 2 {
buf.WriteString("(")
sanitizeChunks(buf, chunks[:n/2])
buf.WriteString(" + ")
sanitizeChunks(buf, chunks[n/2:])
buf.WriteString(")")
return
}
b := chunks[0]
if bytes.Equal(b, backquote) {
buf.WriteString("\"`\"")
return
}
if bytes.Equal(b, bom) {
buf.WriteString(`"\xEF\xBB\xBF"`)
return
}
buf.WriteString("`")
buf.Write(b)
buf.WriteString("`")
}

func header_compressed_nomemcopy(w io.Writer) error {
Expand Down Expand Up @@ -337,7 +376,7 @@ func uncompressed_memcopy(w io.Writer, asset *Asset, r io.Reader) error {
return err
}
if utf8.Valid(b) && !bytes.Contains(b, []byte{0}) {
fmt.Fprintf(w, "`%s`", sanitize(b))
w.Write(sanitize(b))
} else {
fmt.Fprintf(w, "%+q", b)
}
Expand Down
23 changes: 23 additions & 0 deletions release_test.go
@@ -0,0 +1,23 @@
package bindata

import "testing"

var sanitizeTests = []struct {
in string
out string
}{
{`hello`, "`hello`"},
{"hello\nworld", "`hello\nworld`"},
{"`ello", "(\"`\" + `ello`)"},
{"`a`e`i`o`u`", "(((\"`\" + `a`) + (\"`\" + (`e` + \"`\"))) + ((`i` + (\"`\" + `o`)) + (\"`\" + (`u` + \"`\"))))"},
{"\xEF\xBB\xBF`s away!", "(\"\\xEF\\xBB\\xBF\" + (\"`\" + `s away!`))"},
}

func TestSanitize(t *testing.T) {
for _, tt := range sanitizeTests {
out := []byte(sanitize([]byte(tt.in)))
if string(out) != tt.out {
t.Errorf("sanitize(%q):\nhave %q\nwant %q", tt.in, out, tt.out)
}
}
}

1 comment on commit e5078a2

@rsc
Copy link
Contributor Author

@rsc rsc commented on e5078a2 Jan 5, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks!

Please sign in to comment.