Skip to content

Commit

Permalink
syntax: add a LangVariant parameter to Quote
Browse files Browse the repository at this point in the history
I had missed that $'' expansions are non-POSIX,
and only implemented by Bash and mksh.
So, in POSIX mode, we can't quote non-printable characters.

Moreover, fuzzing uncovered that mksh implements \x differently,
meaning that we require extra logic to follow its rules.
Keep all the fuzz crashers that we found in the process.

Since we've started having more edge cases that we can't quote,
start returning an error in the API, with a QuoteError type.
All it gives right now is a character position and a reason.

Finally, document what versions of Bash and mksh we develop with.
This matters, because some systems ship with very old versions,
which can implement slightly different quoting or escaping rules.

While at it, start using quicktest for the tests.
  • Loading branch information
mvdan committed Oct 1, 2021
1 parent 1ee509b commit 8bd780f
Show file tree
Hide file tree
Showing 16 changed files with 318 additions and 131 deletions.
10 changes: 5 additions & 5 deletions expand/param.go
Original file line number Diff line number Diff line change
Expand Up @@ -263,11 +263,11 @@ func (cfg *Config) paramExp(pe *syntax.ParamExp) (string, error) {
case syntax.OtherParamOps:
switch arg {
case "Q":
var ok bool
str, ok = syntax.Quote(str)
if !ok {
// Variables can't contain null bytes.
panic("syntax.Quote should never fail on a variable")
str, err = syntax.Quote(str, syntax.LangBash)
if err != nil {
// Is this even possible? If a user runs into this panic,
// it's most likely a bug we need to fix.
panic(err)
}
case "E":
tail := str
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ go 1.16

require (
github.com/creack/pty v1.1.15
github.com/frankban/quicktest v1.13.1
github.com/google/renameio v1.0.1
github.com/kr/pretty v0.3.0
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e
Expand Down
6 changes: 6 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/creack/pty v1.1.15 h1:cKRCLMj3Ddm54bKSpemfQ8AtYFBhAI2MPmdys22fBdc=
github.com/creack/pty v1.1.15/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
github.com/frankban/quicktest v1.13.1 h1:xVm/f9seEhZFL9+n5kv5XLrGwy6elc4V9v/XFY2vmd8=
github.com/frankban/quicktest v1.13.1/go.mod h1:NeW+ay9A/U67EYXNFA1nPE8e/tnQv/09mUdL/ijj8og=
github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ=
github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/renameio v1.0.1 h1:Lh/jXZmvZxb0BBeSY5VKEfidcbcbenKjZFzM/q0fSeU=
github.com/google/renameio v1.0.1/go.mod h1:t/HQoYBZSsWSNK35C6CO/TpPLDVWvxOHboWUAweKUpk=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
Expand All @@ -22,6 +26,8 @@ golang.org/x/sys v0.0.0-20210925032602-92d5a993a665 h1:QOQNt6vCjMpXE7JSK5VvAzJC1
golang.org/x/sys v0.0.0-20210925032602-92d5a993a665/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20210916214954-140adaaadfaf h1:Ihq/mm/suC88gF8WFcVwk+OV6Tq+wyA1O0E5UEvDglI=
golang.org/x/term v0.0.0-20210916214954-140adaaadfaf/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/errgo.v2 v2.1.0 h1:0vLT13EuvQ0hNvakwLuFZ/jYrLp5F3kcWHXdRggjCE8=
Expand Down
8 changes: 5 additions & 3 deletions syntax/example_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,11 @@ func ExampleQuote() {
"invalid-\xe2'",
"nonprint-\x0b\x1b",
} {
quoted, ok := syntax.Quote(s)
if !ok {
fmt.Printf("%q cannot be quoted", s)
// We assume Bash syntax here.
// For general shell syntax quoting, use syntax.LangPOSIX.
quoted, err := syntax.Quote(s, syntax.LangBash)
if err != nil {
fmt.Printf("%q cannot be quoted: %v\n", s, err)
} else {
fmt.Printf("Quote(%17q): %s\n", s, quoted)
}
Expand Down
59 changes: 43 additions & 16 deletions syntax/fuzz_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
package syntax

import (
"fmt"
"io"
"os/exec"
"strings"
Expand All @@ -19,21 +20,46 @@ func FuzzQuote(f *testing.F) {
}

// Keep in sync with ExampleQuote.
f.Add("foo")
f.Add("bar $baz")
f.Add(`"won't"`)
f.Add(`~/home`)
f.Add("#1304")
f.Add("name=value")
f.Add(`glob-*`)
f.Add("invalid-\xe2'")
f.Add("nonprint-\x0b\x1b")
f.Fuzz(func(t *testing.T, s string) {
quoted, ok := Quote(s)
if !ok {
// Contains a null byte; not interesting.
f.Add("foo", uint8(LangBash))
f.Add("bar $baz", uint8(LangBash))
f.Add(`"won't"`, uint8(LangBash))
f.Add(`~/home`, uint8(LangBash))
f.Add("#1304", uint8(LangBash))
f.Add("name=value", uint8(LangBash))
f.Add(`glob-*`, uint8(LangBash))
f.Add("invalid-\xe2'", uint8(LangBash))
f.Add("nonprint-\x0b\x1b", uint8(LangBash))
f.Fuzz(func(t *testing.T, s string, langVariant uint8) {
if langVariant > 3 {
t.Skip() // lang variants are 0-3
}
lang := LangVariant(langVariant)
quoted, err := Quote(s, lang)
if err != nil {
// Cannot be quoted; not interesting.
t.Skip()
}

var shellProgram string
switch lang {
case LangBash:
hasBash51(t)
shellProgram = "bash"
case LangPOSIX:
hasDash059(t)
shellProgram = "dash"
case LangMirBSDKorn:
hasMksh59(t)
shellProgram = "mksh"
case LangBats:
t.Skip() // bats has no shell and its syntax is just bash
default:
panic(fmt.Sprintf("unknown lang variant: %d", lang))
}

// TODO: Also double-check with our parser.
// That should allow us to fuzz Bats too, for instance.

// Beware that this might run arbitrary code
// if Quote is too naive and allows ';' or '$'.
//
Expand All @@ -43,13 +69,14 @@ func FuzzQuote(f *testing.F) {
//
// We could consider ways to fully sandbox the bash process,
// but for now that feels overkill.
out, err := exec.Command("bash", "-c", "printf %s "+quoted).CombinedOutput()
out, err := exec.Command(shellProgram, "-c", "printf %s "+quoted).CombinedOutput()
if err != nil {
t.Fatalf("bash error on %q quoted as %s: %v: %s", s, quoted, err, out)
t.Fatalf("%s error on %q quoted as %s: %v: %s", shellProgram, s, quoted, err, out)
}
want, got := s, string(out)
if want != got {
t.Fatalf("output mismatch on %q quoted as %s: got %q (len=%d)", want, quoted, got, len(got))
t.Fatalf("%s output mismatch on %q quoted as %s: got %q (len=%d)",
shellProgram, want, quoted, got, len(got))
}
})
}
Expand Down
107 changes: 0 additions & 107 deletions syntax/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,7 @@ package syntax

import (
"bytes"
"fmt"
"io"
"strconv"
"strings"
"unicode"
"unicode/utf8"
)

Expand Down Expand Up @@ -1147,106 +1143,3 @@ func testBinaryOp(val string) BinTestOperator {
return 0
}
}

// Quote returns a quoted version of the input string,
// so that the quoted version is always expanded or interpreted
// as the original string.
//
// When the boolean result is false,
// the input string cannot be quoted to satisfy the rule above.
// For example, an expanded shell string can't contain a null byte.
//
// Quoting is necessary when using arbitrary literal strings
// as words in a shell script or command.
// Without quoting, one could run into syntax errors,
// as well as the possibility of running unintended code.
//
// The quoting strategy is chosen on a best-effort basis,
// to minimize the amount of extra bytes necessary.
//
// Some strings do not require any quoting and are returned unchanged.
// Those strings can be directly surrounded in single quotes.
func Quote(s string) (_ string, ok bool) {
shellChars := false
nonPrintable := false
for _, r := range s {
switch r {
// Like regOps; token characters.
case ';', '"', '\'', '(', ')', '$', '|', '&', '>', '<', '`',
// Whitespace; might result in multiple fields.
' ', '\t', '\r', '\n',
// Escape sequences would be expanded.
'\\',
// Would start a comment unless quoted.
'#',
// Might result in brace expansion.
'{',
// Might result in tilde expansion.
'~',
// Might result in globbing.
'*', '?', '[',
// Might result in an assignment.
'=':
shellChars = true
case '\x00':
// We can't quote null bytes.
return "", false
}
if r == utf8.RuneError || !unicode.IsPrint(r) {
nonPrintable = true
}
}
if !shellChars && !nonPrintable && !IsKeyword(s) {
// Nothing to quote; avoid allocating.
return s, true
}

// Single quotes are usually best,
// as they don't require any escaping of characters.
// If we have any invalid utf8 or non-printable runes,
// use $'' so that we can escape them.
// Note that we can't use double quotes for those.
var b strings.Builder
if nonPrintable {
b.WriteString("$'")
quoteBuf := make([]byte, 0, 16)
for rem := s; len(rem) > 0; {
r, size := utf8.DecodeRuneInString(rem)
switch {
case r == utf8.RuneError && size == 1:
fmt.Fprintf(&b, "\\x%x", rem[0])
case !unicode.IsPrint(r):
quoteBuf = quoteBuf[:0]
quoteBuf = strconv.AppendQuoteRuneToASCII(quoteBuf, r)
// We don't want the single quotes from strconv.
b.Write(quoteBuf[1 : len(quoteBuf)-1])
case r == '\'', r == '\\':
b.WriteByte('\\')
b.WriteRune(r)
default:
b.WriteRune(r)
}
rem = rem[size:]
}
b.WriteString("'")
return b.String(), true
}

// Single quotes without any need for escaping.
if !strings.Contains(s, "'") {
return "'" + s + "'", true
}

// The string contains single quotes,
// so fall back to double quotes.
b.WriteByte('"')
for _, r := range s {
switch r {
case '"', '\\', '`', '$':
b.WriteByte('\\')
}
b.WriteRune(r)
}
b.WriteByte('"')
return b.String(), true
}
4 changes: 4 additions & 0 deletions syntax/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ const (
// LangBash corresponds to the GNU Bash language, as described in its
// manual at https://www.gnu.org/software/bash/manual/bash.html.
//
// We currently follow Bash version 5.1.
//
// Its string representation is "bash".
LangBash LangVariant = iota

Expand All @@ -45,6 +47,8 @@ const (
// Note that it shares some features with Bash, due to the the shared
// ancestry that is ksh.
//
// We currently follow mksh version 59.
//
// Its string representation is "mksh".
LangMirBSDKorn

Expand Down
Loading

0 comments on commit 8bd780f

Please sign in to comment.