Permalink
Browse files

Move echo -e lexer definition in preparation for sharing with $''.

Also hook up ash to builtin-io.test.sh.  Find more bad shell behavior.

Simplify most dash tests to account for the lack of 'echo -e'
difference.  dash's echo interprets escapes by default.
  • Loading branch information...
Andy Chu
Andy Chu committed Jan 8, 2018
1 parent fc0d463 commit 019dfc791aab0982435ddd6747be9a93bca663aa
Showing with 81 additions and 56 deletions.
  1. +1 −30 core/builtin.py
  2. +3 −5 core/lexer.py
  3. +31 −0 osh/lex.py
  4. +44 −19 spec/builtin-io.test.sh
  5. +2 −2 test/spec.sh
View
@@ -222,35 +222,7 @@ def Resolve(argv0):
return EBuiltin.NONE
R = lexer.R
C = lexer.C
# TODO:
# - See if re2c supports {1,2} etc.
# - the DOLLAR_SQ lex state needs most of this logic.
ECHO_LEXER = lexer.SimpleLexer([
R(r'\\[0abeEfrtnv\\]', Id.Char_OneChar),
C(r'\c', Id.Char_Stop),
# Note: tokens above \0377 can either be truncated or be flagged a syntax
# error in strict mode.
R(r'\\0[0-7]{1,3}', Id.Char_Octal),
# \x6 is valid in bash
R(r'\\x[0-9a-fA-F]{1,2}', Id.Char_Hex),
R(r'\\u[0-9]{1,4}', Id.Char_Unicode4),
R(r'\\U[0-9]{1,8}', Id.Char_Unicode8),
R(r'[^\\]+', Id.Char_Literals),
R(r'\\.', Id.Char_Literals),
# Backslash that ends the string.
R(r'\\$', Id.Char_Literals),
# For re2c. TODO: need to make that translation.
C('\\\0', Id.Char_Literals),
])
ECHO_LEXER = lexer.SimpleLexer(lex.C_STRING_DEF)
_ONE_CHAR = {
'0': '\0',
@@ -266,7 +238,6 @@ def Resolve(argv0):
'\\': '\\',
}
# Strict mode syntax errors:
#
# \x is a syntax error -- needs two digits (It's like this in C)
View
@@ -235,12 +235,10 @@ def Read(self, lex_mode):
return t
# Based on osh/parse_lib.py MatchToken_Slow.
class SimpleLexer(object):
"""
Use Cases:
- echo -e, which interprets C-escaped strings.
- read -r
"""Lexer for echo -e, which interprets C-escaped strings.
Based on osh/parse_lib.py MatchToken_Slow.
"""
def __init__(self, pat_list):
self.pat_list = CompileAll(pat_list)
View
@@ -393,6 +393,37 @@ def IsKeyword(name):
C("'", Id.Right_SingleQuote),
]
# TODO:
# - See if re2c supports {1,2} etc.
# - the DOLLAR_SQ lex state needs most of this logic.
# Differences:
# - $'' ends at a single quote, so \' is escaped
# - echo -e ends at \0
# Used by ECHO_LEXER in core/builtin.py.
C_STRING_DEF = [
R(r'\\[0abeEfrtnv\\]', Id.Char_OneChar),
C(r'\c', Id.Char_Stop),
# Note: tokens above \0377 can either be truncated or be flagged a syntax
# error in strict mode.
R(r'\\0[0-7]{1,3}', Id.Char_Octal),
# \x6 is valid in bash
R(r'\\x[0-9a-fA-F]{1,2}', Id.Char_Hex),
R(r'\\u[0-9]{1,4}', Id.Char_Unicode4),
R(r'\\U[0-9]{1,8}', Id.Char_Unicode8),
R(r'[^\\]+', Id.Char_Literals),
R(r'\\.', Id.Char_Literals),
# Backslash that ends the string.
R(r'\\$', Id.Char_Literals),
# For re2c. TODO: need to make that translation.
C('\\\0', Id.Char_Literals),
]
# NOTE: Id.Ignored_LineCont is also not supported here, even though the whole
# point of it is that supports other backslash escapes like \n! It just
# becomes a regular backslash.
View
@@ -78,9 +78,11 @@ echo -e 'ab\0cd'
# BUG dash stdout-json: "-e ab\n"
### \c stops processing input
echo -e xy 'ab\cde' 'ab\cde'
flags='-e'
case $SH in */dash) flags='' ;; esac
echo $flags xy 'ab\cde' 'ab\cde'
# stdout-json: "xy ab"
# OK dash stdout-json: "-e xy ab"
# N-I mksh stdout-json: "xy abde abde"
### echo -e with hex escape
@@ -89,19 +91,27 @@ echo -e 'abcd\x65f'
# N-I dash stdout-json: "-e abcd\\x65f\n"
### echo -e with octal escape
echo -e 'abcd\044e'
flags='-e'
case $SH in */dash) flags='' ;; esac
echo $flags 'abcd\044e'
# stdout-json: "abcd$e\n"
# OK dash stdout-json: "-e abcd$e\n"
### echo -e with 4 digit unicode escape
echo -e 'abcd\u0065f'
flags='-e'
case $SH in */dash) flags='' ;; esac
echo $flags 'abcd\u0065f'
# stdout-json: "abcdef\n"
# OK dash stdout-json: "-e abcd\\u0065f\n"
# N-I dash/ash stdout-json: "abcd\\u0065f\n"
### echo -e with 8 digit unicode escape
echo -e 'abcd\U00000065f'
flags='-e'
case $SH in */dash) flags='' ;; esac
echo $flags 'abcd\U00000065f'
# stdout-json: "abcdef\n"
# OK dash stdout-json: "-e abcd\\U00000065f\n"
# N-I dash/ash stdout-json: "abcd\\U00000065f\n"
### \0377 is the highest octal byte
echo -en '\03777' | od -A n -t x1 | sed 's/ \+/ /g'
@@ -112,13 +122,17 @@ echo -en '\03777' | od -A n -t x1 | sed 's/ \+/ /g'
# It is 256 % 256 which gets interpreted as a NUL byte.
echo -en '\04000' | od -A n -t x1 | sed 's/ \+/ /g'
# stdout-json: " 00 30\n"
# BUG ash stdout-json: " 20 30 30\n"
# N-I dash stdout-json: " 2d 65 6e 20\n"
### \0777 is out of range
echo -en '\0777' | od -A n -t x1 | sed 's/ \+/ /g'
flags='-en'
case $SH in */dash) flags='-n' ;; esac
echo $flags '\0777' | od -A n -t x1 | sed 's/ \+/ /g'
# stdout-json: " ff\n"
# OK mksh stdout-json: " c3 bf\n"
# OK dash stdout-json: " 2d 65 6e 20 ff 0a\n"
# BUG mksh stdout-json: " c3 bf\n"
# BUG ash stdout-json: " 3f 37\n"
### incomplete hex escape
echo -en 'abcd\x6' | od -A n -c | sed 's/ \+/ /g'
@@ -133,26 +147,36 @@ echo -e '\x' '\xg' | od -A n -c | sed 's/ \+/ /g'
# BUG mksh/zsh stdout-json: " \\0 \\0 g \\n\n"
### incomplete octal escape
echo -en 'abcd\04' | od -A n -c | sed 's/ \+/ /g'
flags='-en'
case $SH in */dash) flags='-n' ;; esac
echo $flags 'abcd\04' | od -A n -c | sed 's/ \+/ /g'
# stdout-json: " a b c d 004\n"
# OK dash stdout-json: " - e n a b c d 004 \\n\n"
### incomplete unicode escape
echo -en 'abcd\u006' | od -A n -c | sed 's/ \+/ /g'
# stdout-json: " a b c d 006\n"
# N-I dash stdout-json: " - e n a b c d \\ u 0 0 6 \\n\n"
# BUG ash stdout-json: " a b c d \\ u 0 0 6\n"
### \u6
echo -e '\u6' | od -A n -c | sed 's/ \+/ /g'
# stdout-json: " 006 \\n\n"
# N-I dash stdout-json: " - e \\ u 6 \\n\n"
flags='-en'
case $SH in */dash) flags='-n' ;; esac
echo $flags '\u6' | od -A n -c | sed 's/ \+/ /g'
# stdout-json: " 006\n"
# N-I dash/ash stdout-json: " \\ u 6\n"
### \0 \1 \8
# \0 is special, but \1 isn't in bash
# \1 is special in dash! geez
echo -e '\0' '\1' '\8' | od -A n -c | sed 's/ \+/ /g'
# stdout-json: " \\0 \\ 1 \\ 8 \\n\n"
# BUG dash stdout-json: " - e 001 \\ 8 \\n\n"
flags='-en'
case $SH in */dash) flags='-n' ;; esac
echo $flags '\0' '\1' '\8' | od -A n -c | sed 's/ \+/ /g'
# stdout-json: " \\0 \\ 1 \\ 8\n"
# BUG dash stdout-json: " 001 \\ 8\n"
# BUG ash stdout-json: " \\0 001 \\ 8\n"
### Read builtin
# NOTE: there are TABS below
@@ -300,6 +324,7 @@ echo '\a \b \c \d \e \f \g \h \x65 \145 \i' > $TMP/read-c.txt
read line < $TMP/read-c.txt
echo $line
# stdout-json: "a b c d e f g h x65 145 i\n"
# BUG ash stdout-json: "abcdefghx65 145 i\n"
# BUG dash/zsh stdout-json: "\u0007 \u0008\n"
# BUG mksh stdout-json: "\u0007 \u0008 d \u001b \u000c g h e 145 i\n"
View
@@ -263,7 +263,7 @@ builtins() {
builtin-io() {
sh-spec spec/builtin-io.test.sh --osh-failures-allowed 1 \
${REF_SHELLS[@]} $ZSH $OSH "$@"
${REF_SHELLS[@]} $ZSH $BUSYBOX_ASH $OSH "$@"
}
builtins2() {
@@ -398,7 +398,7 @@ var-op-other() {
}
var-op-strip() {
sh-spec spec/var-op-strip.test.sh --osh-failures-allowed 2 \
sh-spec spec/var-op-strip.test.sh --osh-failures-allowed 1 \
${REF_SHELLS[@]} $ZSH $OSH "$@"
}

0 comments on commit 019dfc7

Please sign in to comment.