View
@@ -1144,9 +1144,9 @@ def testQuineDb(self):
self.assertEqual(1, len(w.parts))
p = w.parts[0]
self.assertEqual(3, len(p.tokens))
self.assertEqual(Id.Lit_Chars, p.tokens[0].id)
self.assertEqual(Id.Lit_EscapedChar, p.tokens[1].id)
self.assertEqual(Id.Lit_Chars, p.tokens[2].id)
self.assertEqual(Id.Char_Literals, p.tokens[0].id)
self.assertEqual(Id.Char_OneChar, p.tokens[1].id)
self.assertEqual(Id.Char_Literals, p.tokens[2].id)
def testArithConstants(self):
# Found in Gherkin
View
@@ -393,19 +393,8 @@ def IsKeyword(name):
C("'", Id.Right_SingleQuote),
]
# TODO:
# - See if re2c supports {1,2} etc.
# - the DOLLAR_SQ lex state needs most of this logic.
# Differences:
# - $'' ends at a single quote, so \' is escaped
# - echo -e ends at \0
# Used by ECHO_LEXER in core/builtin.py.
C_STRING_DEF = [
R(r'\\[0abeEfrtnv\\]', Id.Char_OneChar),
C(r'\c', Id.Char_Stop),
# Shared between echo -e and $''.
_C_STRING_COMMON = [
# Note: tokens above \0377 can either be truncated or be flagged a syntax
# error in strict mode.
R(r'\\0[0-7]{1,3}', Id.Char_Octal),
@@ -415,24 +404,52 @@ def IsKeyword(name):
R(r'\\u[0-9]{1,4}', Id.Char_Unicode4),
R(r'\\U[0-9]{1,8}', Id.Char_Unicode8),
R(r'[^\\]+', Id.Char_Literals),
R(r'\\[0abeEfrtnv\\]', Id.Char_OneChar),
# Backslash that ends a line. Note '.' doesn't match a newline character.
C('\\\n', Id.Char_Literals),
]
# Used by ECHO_LEXER in core/builtin.py.
ECHO_E_DEF = _C_STRING_COMMON + [
C(r'\c', Id.Char_Stop),
# e.g. \A -> \A, is not a backslash escape.
# This has to come AFTER the \c and so forth.
#
# This includes embedded \n? Is that possible with echo -e?
R(r'\\.', Id.Char_Literals),
# Backslash that ends the string.
R(r'\\$', Id.Char_Literals),
# e.g. 'foo', anything that's not a backslash escape
R(r'[^\\]+', Id.Char_Literals),
]
UNUSED = [
# For re2c. TODO: need to make that translation.
C('\\\0', Id.Char_Literals),
]
# NOTE: Id.Ignored_LineCont is also not supported here, even though the whole
# point of it is that supports other backslash escapes like \n! It just
# becomes a regular backslash.
LEXER_DEF[lex_mode_e.DOLLAR_SQ] = [
R(r"[^'\\\0]+", Id.Lit_Chars),
LEXER_DEF[lex_mode_e.DOLLAR_SQ] = _C_STRING_COMMON + [
# \' is escaped in $'' mode, but not echo -e
C(r'\'', Id.Char_OneChar),
# e.g. 'foo', anything that's not a backslash escape. Need to exclude ' as
# well.
R(r"[^\\'\0]+", Id.Char_Literals),
# e.g. \x doesn't match a hex escape. This could be an error.
C('\\', Id.Char_Literals),
C("'", Id.Right_SingleQuote),
R(r"\\[^\0]", Id.Lit_EscapedChar),
# Backslash that ends the file! Caught by re2c exhaustiveness check. For
# now, make it Unknown.
# Backslash that ends the file! Caught by re2c exhaustiveness check. Parser
# will assert; should give a better syntax error.
C('\\\0', Id.Unknown_Tok),
]
View
@@ -138,6 +138,17 @@ def testDBracketState(self):
self.assertTokensEqual(ast.token(Id.BoolUnary_z, '-z'), t)
self.assertEqual(Kind.BoolUnary, LookupKind(t.id))
def testDollarSqState(self):
lexer = _InitLexer(r'foo bar\n \x00 \000 \u0065')
t = lexer.Read(lex_mode_e.DOLLAR_SQ)
print(t)
self.assertTokensEqual(ast.token(Id.Char_Literals, 'foo bar'), t)
t = lexer.Read(lex_mode_e.DOLLAR_SQ)
print(t)
self.assertTokensEqual(ast.token(Id.Char_OneChar, r'\n'), t)
def testLookAhead(self):
# I think this is the usage pattern we care about. Peek and Next() past
# the function; then Peek() the next token. Then Lookahead in that state.
View
@@ -70,7 +70,9 @@ module osh
ArrayLiteralPart(word* words)
| LiteralPart(token token)
| EscapedLiteralPart(token token)
| SingleQuotedPart(token* tokens)
-- for 'foo=' and "${a:-}"
| EmptyPart()
| SingleQuotedPart(token left, token* tokens)
| DoubleQuotedPart(word_part* parts)
| SimpleVarSub(token token)
| BracedVarSub(token token,
View
@@ -153,7 +153,7 @@ def _ReadVarOpArg(self, arg_lex_mode, eof_type=Id.Undefined_Tok,
# return a CompoundWord with no parts, which is explicitly checked with a
# custom error message.
if not w.parts and arg_lex_mode == lex_mode_e.VS_ARG_DQ and empty_ok:
w.parts.append(ast.SingleQuotedPart())
w.parts.append(ast.EmptyPart())
return w
def _ReadSliceArg(self):
@@ -492,29 +492,31 @@ def _ReadBracedBracedVarSub(self, d_quoted=False):
return part
def _ReadSingleQuotedPart(self, lex_mode):
quoted_part = ast.SingleQuotedPart()
left = self.cur_token
tokens = []
done = False
while not done:
self._Next(lex_mode)
self._Peek()
if self.token_kind == Kind.Lit:
quoted_part.tokens.append(self.cur_token)
# Kind.Char emitted in DOLLAR_SQ state
if self.token_kind in (Kind.Lit, Kind.Char):
tokens.append(self.cur_token)
elif self.token_kind == Kind.Eof:
self.AddErrorContext('Unexpected EOF in single-quoted string')
return False
elif self.token_kind == Kind.Right:
done = True # assume Id.Right_S_QUOTE
done = True # assume Id.Right_SingleQuote
else:
raise AssertionError(
'Unhandled token in single-quoted part %s (%d)' %
(self.cur_token, self.token_kind))
return quoted_part
return ast.SingleQuotedPart(left, tokens)
def _ReadDoubleQuotedLeftParts(self):
"""Read substitution parts in a double quoted context."""
View
@@ -88,10 +88,12 @@ parser() {
wc -l osh/osh.asdl core/id_kind.py | sort -n
echo
# I'm counting brace detection/expansion here because it doesn't depend on
# runtime info.
echo 'Lexer/Parser'
wc -l osh/{*_parse.py,lex.py,parse_lib.py} core/{word,braces}.py | sort -n
echo 'Lexer / Parser'
wc -l osh/{*_parse.py,lex.py,parse_lib.py} core/word.py | sort -n
echo
echo 'Compiler / Middle End'
wc -l core/{braces,word_compile}.py | sort -n
echo
echo 'Common Algorithms'
View
@@ -58,6 +58,28 @@ echo -ez 'abc\n'
# stdout-json: "-ez abc\\n\n"
# OK dash/mksh/zsh stdout-json: "-ez abc\n\n"
### echo -e with embedded newline
flags='-e'
case $SH in */dash) flags='' ;; esac
echo $flags 'foo
bar'
## STDOUT:
foo
bar
## END
### echo -e line continuation
flags='-e'
case $SH in */dash) flags='' ;; esac
echo $flags 'foo\
bar'
## STDOUT:
foo\
bar
## END
### echo -e with C escapes
# https://www.gnu.org/software/bash/manual/bashref.html#Bourne-Shell-Builtins
# not sure why \c is like NUL?
View
@@ -15,21 +15,21 @@ set -o errexit
_compare() {
set +o errexit
"$@" >_tmp/left.txt
local left_status=$?
"$@" >_tmp/shebang.txt
local expected_status=$?
bin/osh "$@" >_tmp/right.txt
local right_status=$?
bin/osh "$@" >_tmp/osh.txt
local osh_status=$?
set -o errexit
if ! diff -u _tmp/left.txt _tmp/right.txt; then
if ! diff -u _tmp/shebang.txt _tmp/osh.txt; then
echo FAIL
exit 1
fi
if test $left_status != $right_status; then
echo "FAIL: Got status $right_status but expected $left_status"
if test $expected_status != $osh_status; then
echo "FAIL: Got status $osh_status but expected $expected_status"
exit 1
fi
@@ -92,8 +92,8 @@ complex-here-docs() { _compare gold/complex-here-docs.sh; }
strip-op-char-class() { _compare gold/strip-op-char-class.sh; }
# Not implemented in osh.
# Similar tests for backslash escaping.
echo-e() { _compare gold/echo-e.sh; }
dollar-sq() { _compare gold/dollar-sq.sh; }
# Needs declare -p
View
@@ -234,9 +234,8 @@ subshell() {
${REF_SHELLS[@]} $OSH "$@"
}
# Need to implement $'' escaping
quote() {
sh-spec spec/quote.test.sh --osh-failures-allowed 1 \
sh-spec spec/quote.test.sh \
${REF_SHELLS[@]} $BUSYBOX_ASH $OSH "$@"
}