Permalink
Browse files

Change the ${x/pat/replace} parser to use the exception style.

This is a good change -- it revealed bugs like ${x/%/} not being checked
for an empty pattern.

It should be ${x/'%'/} or ${x/\%/}.

The control flow looks better too.
  • Loading branch information...
Andy Chu
Andy Chu committed Aug 21, 2018
1 parent 21d9e9d commit f4870932bbf49192138bc8ecf3af943e9e810d45
Showing with 47 additions and 26 deletions.
  1. +1 −1 osh/lex.py
  2. +36 −24 osh/word_parse.py
  3. +10 −1 test/parse-errors.sh
View
@@ -110,7 +110,7 @@
# In oil, I hope to have these lexer modes:
# COMMAND
# EXPRESSION (takes place of ARITH, VS_UNQ_ARG, VS_DQ_ARG)
# EXPRESSION (takes place of ARITH, VS_ARG_UNQ, VS_ARG_DQ)
# SQ RAW_SQ DQ RAW_DQ
# VS -- a single state here? Or switches into expression state, because }
# is an operator
View
@@ -203,7 +203,8 @@ def _ReadPatSubVarOp(self, lex_mode):
do_suffix = False
pat = self._ReadVarOpArg(lex_mode, eof_type=Id.Lit_Slash, empty_ok=False)
if not pat: return None
if not pat:
return None
if len(pat.parts) == 1:
ok, s, quoted = word.StaticEval(pat)
@@ -216,40 +217,51 @@ def _ReadPatSubVarOp(self, lex_mode):
if len(pat.parts) == 0:
p_die('Pattern in ${x/pat/replace} must not be empty',
token=self.cur_token)
else:
first_part = pat.parts[0]
if first_part.tag == word_part_e.LiteralPart:
lit_id = first_part.token.id
if lit_id == Id.Lit_Slash:
do_all = True
pat.parts.pop(0)
elif lit_id == Id.Lit_Pound:
do_prefix = True
pat.parts.pop(0)
elif lit_id == Id.Lit_Percent:
do_suffix = True
pat.parts.pop(0)
#self._Peek()
# Check for / # % modifier on pattern.
first_part = pat.parts[0]
if first_part.tag == word_part_e.LiteralPart:
lit_id = first_part.token.id
if lit_id == Id.Lit_Slash:
do_all = True
pat.parts.pop(0)
elif lit_id == Id.Lit_Pound:
do_prefix = True
pat.parts.pop(0)
elif lit_id == Id.Lit_Percent:
do_suffix = True
pat.parts.pop(0)
# TODO: Print the modifier better.
if len(pat.parts) == 0:
p_die('Pattern in ${x/pat/replace} must not be empty (got modifier %s)',
first_part, token=self.cur_token)
if self.token_type == Id.Right_VarSub:
# e.g. ${v/a} is the same as ${v/a/} -- empty replacement string
return ast.PatSub(pat, None, do_all, do_prefix, do_suffix)
elif self.token_type == Id.Lit_Slash:
replace = self._ReadVarOpArg(lex_mode) # do not stop at /
if not replace: return None
if not replace:
return None
self._Peek()
if self.token_type == Id.Right_VarSub:
return ast.PatSub(pat, replace, do_all, do_prefix, do_suffix)
if self.token_type != Id.Right_VarSub:
# NOTE: I think this never happens.
# We're either in the VS_ARG_UNQ or VS_ARG_DQ lex state, and everything
# there is Lit_ or Left_, except for }.
p_die("Expected } after replacement string, got %s", self.cur_token,
token=self.cur_token)
else:
self._BadToken("Expected } after pat sub, got %s", self.cur_token)
return None
return ast.PatSub(pat, replace, do_all, do_prefix, do_suffix)
else:
self._BadToken("Expected } after pat sub, got %s", self.cur_token)
return None
# Happens with ${x//} and ${x///foo}, see test/parse-errors.sh
# TODO: Print the token better.
p_die("Expected } after pat sub, got %s", self.cur_token,
token=self.cur_token)
def _ReadSubscript(self):
""" Subscript = '[' ('@' | '*' | ArithExpr) ']'
View
@@ -33,14 +33,23 @@ cases-in-strings() {
_error-case 'echo $( echo > >> )'
_error-case 'echo ${'
_error-case 'echo ${x/}' # pattern must not be empty
_error-case 'echo ${x/%}' # pattern must not be empty (only had modifier)
_error-case 'echo ${x/%/}' # pattern must not be empty (only had modifier)
# These are a little odd
_error-case 'echo ${x/}'
_error-case 'echo ${x//}'
_error-case 'echo ${x///}'
_error-case 'echo ${x/foo}'
_error-case 'echo ${x//foo}'
_error-case 'echo ${x///foo}'
# Newline in replacement pattern
_error-case 'echo ${x//foo/replace
}'
_error-case 'echo ${x//foo/replace$foo}'
}
# Cases in their own file

0 comments on commit f487093

Please sign in to comment.