Permalink
Browse files

Bug fix: octal syntax is $'\377', not $'\0377'.

This is inconsistent with echo -e '\0377'.

(Related to issue #61: Alpine uses $'\n'.)
  • Loading branch information...
Andy Chu
Andy Chu committed Jan 9, 2018
1 parent 0d880c3 commit c4d7f7d2ad3385030ff4ee606826dfdb61adef7d
Showing with 44 additions and 14 deletions.
  1. +4 −1 core/id_kind.py
  2. +10 −1 core/word_compile.py
  3. +9 −4 gold/dollar-sq.sh
  4. +8 −8 osh/lex.py
  5. +13 −0 spec/quote.test.sh
View
@@ -399,7 +399,10 @@ def _AddKinds(spec):
# For C-escaped strings.
spec.AddKind('Char', [
'OneChar', 'Stop', 'Hex', 'Octal', 'Unicode4', 'Unicode8', 'Literals'
'OneChar', 'Stop', 'Hex',
# Two variants of Octal: \377, and \0377.
'Octal3', 'Octal4',
'Unicode4', 'Unicode8', 'Literals'
])
View
@@ -42,7 +42,16 @@ def EvalCStringToken(id_, value):
elif id_ == Id.Char_Stop: # \c returns a special sentinel
return None
elif id_ == Id.Char_Octal:
elif id_ == Id.Char_Octal3: # $'\377'
s = value[1:]
i = int(s, 8)
if i >= 256:
i = i % 256
# NOTE: This is for strict mode
#raise AssertionError('Out of range')
return chr(i)
elif id_ == Id.Char_Octal4: # \0377 for echo -e
# TODO: Error checking for \0777
s = value[2:]
i = int(s, 8)
View
@@ -17,10 +17,15 @@ echo $'abcd\x65f'
echo $'abcd\044e'
echo $'abcd\u0065f'
echo $'abcd\U00000065f'
# In bash, these are different than echo -e. I'm not sure why yet.
#echo $'\03777' | od -A n -t x1 | sed 's/ \+/ /g'
#echo $'\04000' | od -A n -t x1 | sed 's/ \+/ /g'
#echo $'\0777' | od -A n -t x1 | sed 's/ \+/ /g'
# NOTE: $'\377' is echo -e '\0377', with leading 0
echo $'\3777' | od -A n -t x1 | sed 's/ \+/ /g'
echo $'\4010' | od -A n -t x1 | sed 's/ \+/ /g'
echo $'\777' | od -A n -t x1 | sed 's/ \+/ /g'
# This wraps to \0, so it's not used.
#echo $'\4000' | od -A n -t x1 | sed 's/ \+/ /g'
echo $'abcd\x6' | od -A n -c | sed 's/ \+/ /g'
echo $'\x' $'\xg' | od -A n -c | sed 's/ \+/ /g'
echo $'abcd\04' | od -A n -c | sed 's/ \+/ /g'
View
@@ -395,9 +395,6 @@ def IsKeyword(name):
# Shared between echo -e and $''.
_C_STRING_COMMON = [
# Note: tokens above \0377 can either be truncated or be flagged a syntax
# error in strict mode.
R(r'\\0[0-7]{1,3}', Id.Char_Octal),
# \x6 is valid in bash
R(r'\\x[0-9a-fA-F]{1,2}', Id.Char_Hex),
@@ -412,6 +409,10 @@ def IsKeyword(name):
# Used by ECHO_LEXER in core/builtin.py.
ECHO_E_DEF = _C_STRING_COMMON + [
# Note: tokens above \0377 can either be truncated or be flagged a syntax
# error in strict mode.
R(r'\\0[0-7]{1,3}', Id.Char_Octal4),
C(r'\c', Id.Char_Stop),
# e.g. \A -> \A, is not a backslash escape.
@@ -427,15 +428,14 @@ def IsKeyword(name):
R(r'[^\\]+', Id.Char_Literals),
]
UNUSED = [
# For re2c. TODO: need to make that translation.
C('\\\0', Id.Char_Literals),
]
# NOTE: Id.Ignored_LineCont is also not supported here, even though the whole
# point of it is that supports other backslash escapes like \n! It just
# becomes a regular backslash.
LEXER_DEF[lex_mode_e.DOLLAR_SQ] = _C_STRING_COMMON + [
# Silly difference! In echo -e, the syntax is \0377, but here it's $'\377',
# with no leading 0.
R(r'\\[0-7]{1,3}', Id.Char_Octal3),
# \' is escaped in $'' mode, but not echo -e
C(r'\'', Id.Char_OneChar),
View
@@ -145,6 +145,19 @@ echo $'col1\ncol2\ncol3'
# In dash, \n is special within single quotes
# N-I dash stdout-json: "$col1\ncol2\ncol3\n"
### $'' octal escapes don't have leading 0
# echo -e syntax is echo -e \0377
echo -n $'\001' $'\377' | od -A n -c | sed 's/ \+/ /g'
## STDOUT:
001 377
## END
## N-I dash STDOUT:
$ 001 $ 377
## END
## BUG ash STDOUT:
001 0O7
## END
### $""
echo $"foo"
# stdout: foo

0 comments on commit c4d7f7d

Please sign in to comment.