Permalink
Browse files

Fix bug evaluating unquoted strings with backslashes.

In the case where we ARE splitting and globbing, we still have to quote
backslashes (but not any other character), because it's significant to
both the IFS splitting and globbing operations.

Caught by Python's configure script.
  • Loading branch information...
Andy Chu
Andy Chu committed Jan 22, 2018
1 parent 4666d3b commit 5235e52c7e1d7c8297f0e16e86c081871d0d84f8
Showing with 82 additions and 59 deletions.
  1. +3 −15 core/glob_.py
  2. +21 −37 core/legacy.py
  3. +38 −7 core/word_eval.py
  4. +5 −0 native/libc_test.py
  5. +15 −0 spec/quote.test.sh
View
@@ -63,19 +63,9 @@ def GlobEscape(s):
# We need to handle glob patterns, but fnmatch doesn't give you the positions
# of matches. So we convert globs to regexps.
# There are two regex engines we can use. Each has advantages and
# disadvantages:
# Python regex:
# - Supports Greedy vs. Non-greedy (necessary for strip ops, but not patsub)
# - Doesn't rely on global variables for unicode. I think libc string
# functions use LOCALE?
# ERE:
# - Linear time algorithm
# - Save code space
# - Supports the same character classes as glob.
# TODO: Use this for ${s//pat*/__}
# NOTE: Is [!abc] negation rather than [^abc] ?
# What about unicode? Do we have to set any global variables.
def GlobToExtendedRegex(g):
"""Convert a glob to a libc extended regexp.
@@ -84,8 +74,6 @@ def GlobToExtendedRegex(g):
A ERE string, or None if it's the pattern is a constant string rather than
a glob.
"""
# Could be used for ${s//pat*/__}, but NOT # ## % %%.
# We'll use Python everywhere for simplicity.
raise NotImplementedError
View
@@ -14,6 +14,27 @@
Idea: This is discouraged/legacy, so write it in Oil rather than C++?
Problem: Need both classes and algebraic data types.
Do we have different splitters? Awk splitter might be useful. Regex
splitter later. CSV splitter?
LiteralSlice.
Other kinds of splitters:
- RegexSplitter
- CsvSplitter
- TSV2Splitter -- this transforms because of # \u0065 in JSON. So it's not a
pure slice, but neither is IFS splitting because of backslashes.
- AwkSplitter
- Perl?
- does perl have a spilt context?
with SPLIT_REGEX = / digit+ / {
echo $#
echo $len(argv)
echo $1 $2
echo @argv
}
"""
from core import runtime
@@ -26,25 +47,6 @@
DEFAULT_IFS = ' \t\n'
# TODO:
#
# Do we have different splitters? Awk splitter might be useful. Regex
# splitter later. CSV splitter? TSV? the TSV one transforms? Beacuse of
# \u0065 in JSON. I guess you can have another kind of slice -- a
# LiteralSlice.
#
#
# with SPLIT_REGEX = / digit+ / {
# echo $#
# echo $len(argv)
# echo $1 $2
# echo @argv
# }
#
# Yes this is nice. How does perl do it?
def _SpansToParts(s, spans):
"""Helper for SplitForWordEval."""
parts = []
@@ -379,21 +381,3 @@ def Split(self, s, allow_escape):
spans.append((span_type, n))
return spans
# self.splitter = SplitContext()
# SplitManager
# Has the cache from IFS -> splitter
# Split(s, allow_escape)
#
# _DefaultIfsSplitter -- \t\n\n
# _WhitespaceIfsSplitter
# _OtherIfsSplitter
# _MixedIfsSplitter -- ifs and other
# Split(s, allow_escape)
#
# RegexSplitter
# CsvSplitter (TSV2Splitter maybe)
# AwkSplitter
#
# Any other kind of tokenizing? This is based on lines. So TSV2 does fit in.
View
@@ -26,6 +26,21 @@
e_die = util.e_die
def _BackslashEscape(s):
"""Double up backslashes.
Useful for strings about to be globbed and strings about to be IFS escaped.
"""
return s.replace('\\', '\\\\')
# Similar to GlobEscape and splitter.Escape().
escaped = ''
for c in s:
if c == '\\':
escaped += '\\'
escaped += c
return escaped
def _ValueToPartValue(val, quoted):
"""Helper for VarSub evaluation.
@@ -772,6 +787,8 @@ def _EvalWordFrame(self, frame, argv):
all_split_glob = True
any_split_glob = False
#log('--- frame %s', frame)
for s, do_split_glob in frame:
#log('-- %r %r', s, do_split_glob)
if s:
@@ -798,12 +815,27 @@ def _EvalWordFrame(self, frame, argv):
# Array of strings, some of which are BOTH IFS-escaped and GLOB escaped!
frags = []
for frag, do_split_glob in frame:
#log('do_split_glob %s', do_split_glob)
if will_glob and not do_split_glob:
frag = glob_.GlobEscape(frag)
#log('GLOB ESCAPED %r', p2)
#log('frag %s do_split_glob %s', frag, do_split_glob)
# If it was quoted, then
if do_split_glob:
# We're going to both split and glob. So we want to backslash
# escape twice?
# Suppose we get a literal \.
# \ -> \\
# \\ -> \\\\
# Splitting takes \\\\ -> \\
# Globbing takes \\ to \ if it doesn't match
if will_glob:
frag = _BackslashEscape(frag)
frag = _BackslashEscape(frag)
else:
if will_glob:
frag = glob_.GlobEscape(frag)
#log('GLOB ESCAPED %r', p2)
if not do_split_glob:
frag = self.splitter.Escape(frag)
#log('IFS ESCAPED %r', p2)
@@ -821,10 +853,9 @@ def _EvalWordFrame(self, frame, argv):
return
#log('split args: %r', args)
#out = []
for a in args:
# TODO: Expand() should take out parameter.
results = self.globber.Expand(a)
#out.extend(results)
argv.extend(results)
def _EvalWordSequence(self, words):
View
@@ -47,6 +47,11 @@ def testGlob(self):
print('GLOB')
print(libc.glob('*.py'))
# This will not match anything!
print(libc.glob('\\'))
# This one will match a file named \
print(libc.glob('\\\\'))
def testRegex(self):
#print(libc.regcomp(r'.*\.py'))
self.assertEqual(True, libc.regex_parse(r'.*\.py'))
View
@@ -51,6 +51,21 @@ FOO=bar
echo "foo${#FOO}"
# stdout: foo3
### Storing backslashes and then echoing them
# This is a bug fix; it used to cause problems with unescaping.
one='\'
two='\\'
echo $one $two
echo "$one" "$two"
## STDOUT:
\ \\
\ \\
## END
## BUG dash/mksh STDOUT:
\ \
\ \
# END
### Backslash escapes
echo \$ \| \a \b \c \d \\
# stdout: $ | a b c d \

0 comments on commit 5235e52

Please sign in to comment.