Permalink
Please sign in to comment.
Browse files
Properly lex, parse and evaluate $''.
The lexing of backslash escapes is shared with 'echo -e' to the greatest
extent possible.
- Semi-automatically extract some tests for echo -e and $''. I found
some corner cases where bash's echo -e and $'' diverge. (e.g. octal
escape value out of bounds)
- core/lexer_gen.py: Properly translate regexes with {0,1}
- The SingleQuotedPart now includes the left quote token.
- Introduce EmptyPart for 'foo=' and "${a:-}", instead of reusing
SingleQuotedPart.
- Update lint count scripts.- Loading branch information...
Showing
with
281 additions
and 138 deletions.
- +0 −14 build/codegen.sh
- +4 −0 build/dev.sh
- +7 −69 core/builtin.py
- +15 −8 core/lexer_gen.py
- +2 −0 core/lexer_gen_test.py
- +1 −1 core/word.py
- +75 −0 core/word_compile.py
- +13 −1 core/word_eval.py
- +29 −0 gold/dollar-sq.sh
- +34 −0 gold/echo-e.sh
- +2 −1 osh/ast_.py
- +3 −3 osh/cmd_parse_test.py
- +36 −19 osh/lex.py
- +11 −0 osh/lex_test.py
- +3 −1 osh/osh.asdl
- +8 −6 osh/word_parse.py
- +6 −4 scripts/count.sh
- +22 −0 spec/builtin-io.test.sh
- +9 −9 test/gold.sh
- +1 −2 test/spec.sh
| @@ -0,0 +1,75 @@ | ||
| #!/usr/bin/python | ||
| """ | ||
| word_compile.py | ||
| This is called the "compile" stage because it happens after parsing, but it | ||
| doesn't depend on any values at runtime. | ||
| """ | ||
| from core.id_kind import Id | ||
| _ONE_CHAR = { | ||
| '0': '\0', | ||
| 'a': '\a', | ||
| 'b': '\b', | ||
| 'e': '\x1b', | ||
| 'E': '\x1b', | ||
| 'f': '\f', | ||
| 'n': '\n', | ||
| 'r': '\r', | ||
| 't': '\t', | ||
| 'v': '\v', | ||
| '\\': '\\', | ||
| } | ||
| # TODO: Strict mode syntax errors: | ||
| # | ||
| # \x is a syntax error -- needs two digits (It's like this in C) | ||
| # \0777 is a syntax error -- we shouldn't do modulus | ||
| # \d could be a syntax error -- it is better written as \\d | ||
| def EvalCStringToken(id_, value): | ||
| """ | ||
| This function is shared between echo -e and $''. | ||
| $'' could use it at compile time, much like brace expansion in braces.py. | ||
| """ | ||
| if id_ == Id.Char_OneChar: | ||
| c = value[1] | ||
| return _ONE_CHAR[c] | ||
| elif id_ == Id.Char_Stop: # \c returns a special sentinel | ||
| return None | ||
| elif id_ == Id.Char_Octal: | ||
| # TODO: Error checking for \0777 | ||
| s = value[2:] | ||
| i = int(s, 8) | ||
| if i >= 256: | ||
| i = i % 256 | ||
| # NOTE: This is for strict mode | ||
| #raise AssertionError('Out of range') | ||
| return chr(i) | ||
| elif id_ == Id.Char_Hex: | ||
| s = value[2:] | ||
| i = int(s, 16) | ||
| return chr(i) | ||
| elif id_ == Id.Char_Unicode4: | ||
| s = value[2:] | ||
| i = int(s, 16) | ||
| return unichr(i) | ||
| elif id_ == Id.Char_Unicode8: | ||
| s = value[2:] | ||
| i = int(s, 16) | ||
| return unichr(i) | ||
| elif id_ == Id.Char_Literals: | ||
| return value | ||
| else: | ||
| raise AssertionError | ||
| @@ -1,6 +1,35 @@ | ||
| #!/bin/bash | ||
| # | ||
| # Adapted from gold/echo-e.sh, which was adapted from spec/builtin-io.test.sh. | ||
| echo $'foo\tbar\n' | ||
| echo $'foo\tbar\n\ | ||
| baz' | ||
| echo $'\\' | ||
| echo $'abc\ndef\n' | ||
| echo $'\a\b\d\e\f' | ||
| echo $'\n\r\t\v' | ||
| # Doesn't pass because Python can have NUL embedded in strings! | ||
| #echo $'ab\0cd' | od -A n -c | sed 's/ \+/ /g' | ||
| echo $'abcd\x65f' | ||
| echo $'abcd\044e' | ||
| echo $'abcd\u0065f' | ||
| echo $'abcd\U00000065f' | ||
| # In bash, these are different than echo -e. I'm not sure why yet. | ||
| #echo $'\03777' | od -A n -t x1 | sed 's/ \+/ /g' | ||
| #echo $'\04000' | od -A n -t x1 | sed 's/ \+/ /g' | ||
| #echo $'\0777' | od -A n -t x1 | sed 's/ \+/ /g' | ||
| echo $'abcd\x6' | od -A n -c | sed 's/ \+/ /g' | ||
| echo $'\x' $'\xg' | od -A n -c | sed 's/ \+/ /g' | ||
| echo $'abcd\04' | od -A n -c | sed 's/ \+/ /g' | ||
| echo $'abcd\u006' | od -A n -c | sed 's/ \+/ /g' | ||
| echo $'\u6' | od -A n -c | sed 's/ \+/ /g' | ||
| #echo $'\0' '\1' '\8' | od -A n -c | sed 's/ \+/ /g' | ||
| echo $'foo | ||
| bar' | ||
| echo $'foo\ | ||
| bar' |
| @@ -0,0 +1,34 @@ | ||
| #!/bin/bash | ||
| # | ||
| # Semi-automatically extracted like this: | ||
| # | ||
| # grep ^echo spec/builtin-io.test.sh. | ||
| # | ||
| # For converting to gold/dollar-sq.sh. | ||
| echo -e '\\' | ||
| echo -en 'abc\ndef\n' | ||
| echo -ez 'abc\n' | ||
| echo -e '\a\b\d\e\f' | ||
| echo -e '\n\r\t\v' | ||
| echo -e 'ab\0cd' | ||
| echo -e xy 'ab\cde' 'ab\cde' | ||
| echo -e 'abcd\x65f' | ||
| echo -e 'abcd\044e' | ||
| echo -e 'abcd\u0065f' | ||
| echo -e 'abcd\U00000065f' | ||
| echo -en '\03777' | od -A n -t x1 | sed 's/ \+/ /g' | ||
| echo -en '\04000' | od -A n -t x1 | sed 's/ \+/ /g' | ||
| echo -e '\0777' | od -A n -t x1 | sed 's/ \+/ /g' | ||
| echo -en 'abcd\x6' | od -A n -c | sed 's/ \+/ /g' | ||
| echo -e '\x' '\xg' | od -A n -c | sed 's/ \+/ /g' | ||
| echo -e 'abcd\04' | od -A n -c | sed 's/ \+/ /g' | ||
| echo -en 'abcd\u006' | od -A n -c | sed 's/ \+/ /g' | ||
| echo -e '\u6' | od -A n -c | sed 's/ \+/ /g' | ||
| echo -e '\0' '\1' '\8' | od -A n -c | sed 's/ \+/ /g' | ||
| echo -e 'foo | ||
| bar' | ||
| echo -e 'foo\ | ||
| bar' |
Oops, something went wrong.
0 comments on commit
5ea4327