View
@@ -17,8 +17,11 @@
import re
from core import runtime
from core import util
value_e = runtime.value_e
span_e = runtime.span_e
log = util.log
DEFAULT_IFS = ' \t\n'
@@ -36,102 +39,7 @@ def SplitForWordEval(self, s):
# NOTE: Doesn't need to implement SplitForRead
def _Split(s, ifs):
"""Helper function for IFS split."""
parts = ['']
for c in s:
if c in ifs:
parts.append('')
else:
parts[-1] += c
return parts
def IfsSplit(s, ifs):
"""
http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05
https://www.gnu.org/software/bash/manual/bashref.html#Word-Splitting
Summary:
1. ' \t\n' is special. Whitespace is trimmed off the front and back.
2. if IFS is '', no field splitting is performed.
3. Otherwise, suppose IFS = ' ,\t'. Then IFS whitespace is space or comma.
a. IFS whitespace isgnored at beginning and end.
b. any other IFS char delimits the field, along with adjacent IFS
whitespace.
c. IFS whitespace shall delimit a field.
# Can we do this be regex or something? Use regex match?
"""
assert isinstance(ifs, str), ifs
if not ifs:
return [s] # no splitting
# print("IFS SPLIT %r %r" % (s, ifs))
# TODO: This detect if it's ALL whitespace? If ifs_other is empty?
if ifs == ' \t\n':
return _Split(s, ifs)
# Detect IFS whitespace
# TODO: This should be cached. In Mem? Or Splitter?
ifs_whitespace = ''
ifs_other = ''
for c in ifs:
if c in ' \t\n':
ifs_whitespace += c
else:
ifs_other += c
# TODO: Rule 3a. Ignore leading and trailing IFS whitespace?
# hack to make an RE
# Hm this escapes \t as \\\t? I guess that works.
ws_re = re.escape(ifs_whitespace)
other_re = re.escape(ifs_other)
#print('chars', repr(ifs_whitespace), repr(ifs_other))
#print('RE', repr(ws_re), repr(other_re))
# BUG: re.split() is the wrong model. It works with the 'delimiting' model.
# Forward iteration. TODO: grep for IFS in dash/mksh/bash/ash.
# ifs_ws | ifs_ws* non_ws_ifs ifs_ws*
if ifs_whitespace and ifs_other:
# first alternative is rule 3c.
# BUG: It matches the whitespace first?
pat = '[%s]+|[%s]*[%s][%s]*' % (ws_re, ws_re, other_re, ws_re)
elif ifs_whitespace:
pat = '[%s]+' % ws_re
elif ifs_other:
pat = '[%s]' % other_re
else:
raise AssertionError
#print('PAT', repr(pat))
regex = re.compile(pat)
frags = regex.split(s)
#log('split %r by %r -> frags %s', s, pat, frags)
return frags
# Split operation:
#
# Max to allocate: the length of the string? That's the worst case. Every
# character is a different split.
#
# or use end_index?
#
# word_eval: Makes runtime.fragment out of it. Only takes the parts that are
# not delimiters.
#
# read: assigns it to variables, except for the trailing ones. Don't need
# to split them.
# TODO:
# - Executor holds a splitter. Passes it to word_eval and to the read
# builtin.
#
# Do we have different splitters? Awk splitter might be useful. Regex
# splitter later. CSV splitter? TSV? the TSV one transforms? Beacuse of
@@ -149,6 +57,38 @@ def IfsSplit(s, ifs):
# Yes this is nice. How does perl do it?
def _SpansToParts(s, spans):
"""Helper for SplitForWordEval."""
parts = []
start_index = 0
# If the last span was black, and we get a backslash, set join_next to merge
# two black spans.
join_next = False
last_span_was_black = False
for span_type, end_index in spans:
if span_type == span_e.Black:
if parts and join_next:
parts[-1] += s[start_index:end_index]
join_next = False
else:
parts.append(s[start_index:end_index])
last_span_was_black = True
elif span_type == span_e.Backslash:
if last_span_was_black:
join_next = True
last_span_was_black = False
else:
last_span_was_black = False
start_index = end_index
return parts
class RootSplitter(object):
""" A polymorphic interface to field splitting.
@@ -188,10 +128,7 @@ def _GetSplitter(self):
else:
ifs_other += c
if ifs_other:
sp = MixedSplitter(ifs_whitespace, ifs_other)
else:
sp = WhitespaceSplitter(ifs_whitespace)
sp = IfsSplitter(ifs_whitespace, ifs_other)
# NOTE: Technically, we could make the key more precise. IFS=$' \t' is
# the same as IFS=$'\t '. But most programs probably don't do that, and
@@ -200,10 +137,10 @@ def _GetSplitter(self):
return sp
def ShouldElide(self):
# HACK for now
def Escape(self, s):
"""Escape IFS chars."""
sp = self._GetSplitter()
return isinstance(sp, WhitespaceSplitter)
return sp.Escape(s)
def SplitForWordEval(self, s):
"""Split the string into slices, some of which are marked ignored.
@@ -223,59 +160,39 @@ def SplitForWordEval(self, s):
Array of (ignored Bool, start_index Int) tuples.
"""
sp = self._GetSplitter()
spans = sp.Split(s, False)
parts = []
start_index = 0
for ignored, end_index in spans:
if not ignored:
parts.append(s[start_index:end_index])
start_index = end_index
return parts
spans = sp.Split(s, True)
return _SpansToParts(s, spans)
def SplitForRead(self, line, allow_escape):
sp = self._GetSplitter()
return sp.Split(line, allow_escape)
def SplitForRead(self, s, allow_escape):
# Does this give you back the exact number you need?
# Removes ignored ones
sp = WhitespaceSplitter(DEFAULT_IFS)
spans = sp.Split(s, allow_escape)
parts = ['TODO']
return parts
class _BaseSplitter(object):
def __init__(self, escape_chars):
# Backslash is always escaped
self.escape_chars = escape_chars + '\\'
# NOTE: This is pretty much the same as GlobEscape.
def Escape(self, s):
escaped = ''
for c in s:
if c in self.escape_chars:
escaped += '\\'
escaped += c
return escaped
# We detect state changes. WHITE is for whitespace, BLACK is for significant
# chars.
STATE_WHITE, STATE_BLACK = 0, 2
class WhitespaceSplitter(object):
# TODO: Used this when IFS='' or IFS isn't set? This is the fast path for Oil!
class NullSplitter(_BaseSplitter):
def __init__(self, ifs_whitespace):
_BaseSplitter.__init__(self, ifs_whitespace)
self.ifs_whitespace = ifs_whitespace
def Split(self, s, allow_escape):
ws_chars = self.ifs_whitespace
n = len(s)
spans = [] # NOTE: in C, could reserve() this to len(s)
if n == 0:
return spans # empty
state = STATE_WHITE if s[0] in ws_chars else STATE_BLACK
prev_state = state
i = 1
while i < n:
state = STATE_WHITE if s[i] in ws_chars else STATE_BLACK
if state != prev_state:
spans.append((prev_state == STATE_WHITE, i))
prev_state = state
i += 1
spans.append((prev_state == STATE_WHITE, i))
return spans
raise NotImplementedError
# IFS splitting is complicated in general. We handle it with three concepts:
@@ -355,14 +272,16 @@ def Split(self, s, allow_escape):
(ST_BACKSLASH, CH_DE_WHITE): (ST_BLACK, EMIT_ESCAPE), # '\ '
(ST_BACKSLASH, CH_DE_GRAY): (ST_BLACK, EMIT_ESCAPE), # '\_'
(ST_BACKSLASH, CH_BLACK): (ST_BLACK, EMIT_ESCAPE), # '\a'
(ST_BACKSLASH, CH_BACKSLASH): (ST_BACKSLASH, EMIT_ESCAPE), # '\\'
# NOTE: second character is a backslash, but new state is ST_BLACK!
(ST_BACKSLASH, CH_BACKSLASH): (ST_BLACK, EMIT_ESCAPE), # '\\'
}
class MixedSplitter(object):
class IfsSplitter(_BaseSplitter):
"""Split a string when IFS has non-whitespace characters."""
def __init__(self, ifs_whitespace, ifs_other):
_BaseSplitter.__init__(self, ifs_whitespace + ifs_other)
self.ifs_whitespace = ifs_whitespace
self.ifs_other = ifs_other
@@ -386,7 +305,7 @@ def Split(self, s, allow_escape):
# Append an ignored span.
if i != 0:
spans.append((True, i))
spans.append((span_e.Delim, i))
# String is ONLY whitespace. We want to skip the last span after the
# while loop.
@@ -400,6 +319,8 @@ def Split(self, s, allow_escape):
ch = CH_DE_WHITE
elif c in other_chars:
ch = CH_DE_GRAY
elif allow_escape and c == '\\':
ch = CH_BACKSLASH
else:
ch = CH_BLACK
@@ -409,21 +330,34 @@ def Split(self, s, allow_escape):
#log('i %d c %r ch %s state %s new_state %s action %s', i, c, ch, state, new_state, action)
if action == EMIT_PART:
spans.append((False, i))
spans.append((span_e.Black, i))
elif action == EMIT_DE:
spans.append((True, i)) # ignored delimiter
spans.append((span_e.Delim, i)) # ignored delimiter
elif action == EMIT_EMPTY:
spans.append((True, i)) # ignored delimiter
spans.append((False, i)) # EMPTY part that is NOT ignored
spans.append((span_e.Delim, i)) # ignored delimiter
spans.append((span_e.Black, i)) # EMPTY part that is NOT ignored
elif action == EMIT_ESCAPE:
spans.append((span_e.Backslash, i)) # \
else:
pass # Emit nothing
state = new_state
i += 1
# Last span
ignored = state in (ST_DE_WHITE1, ST_DE_GRAY, ST_DE_WHITE2)
spans.append((ignored, n))
# Last span. TODO: Put this in the state machine as the \0 char?
if state == ST_BLACK:
span_type = span_e.Black
elif state == ST_BACKSLASH:
span_type = span_e.Backslash
elif state in (ST_DE_WHITE1, ST_DE_GRAY, ST_DE_WHITE2):
span_type = span_e.Delim
else:
raise AssertionError(state) # shouldn't be in START state
spans.append((span_type, n))
return spans
View
@@ -12,33 +12,70 @@ def _RunSplitCases(test, sp, cases):
for expected_parts, s, allow_escape in cases:
spans = sp.Split(s, allow_escape)
print('%r: %s' % (s, spans))
if 0:
print('%r: %s' % (s, spans))
else:
# Verbose for debugging
print(repr(s))
for span in spans:
print(' %s %s' % span)
parts = []
start_index = 0
for ignored, end_index in spans:
if not ignored:
parts.append(s[start_index:end_index])
start_index = end_index
parts = legacy._SpansToParts(s, spans)
print('PARTS %s' % parts)
test.assertEqual(expected_parts, parts,
'%r: %s != %s' % (s, expected_parts, parts))
class SplitTest(unittest.TestCase):
def testSpansToParts(self):
sp = legacy.IfsSplitter(legacy.DEFAULT_IFS, '')
s = 'one\\ two'
spans = sp.Split(s, False)
print(spans)
parts = legacy._SpansToParts(s, spans)
self.assertEqual(['one\\', 'two'], parts)
spans = sp.Split(s, True) # allow_escape
parts = legacy._SpansToParts(s, spans)
self.assertEqual(['one two'], parts)
# NOTE: Only read builtin supports max_results
return
parts = legacy._SpansToParts(s, spans, max_results=1)
self.assertEqual(['one\\ two'], parts)
print(spans)
parts = legacy._SpansToParts(s, spans, max_results=1)
self.assertEqual(['one two'], parts)
def testDefaultIfs(self):
CASES = [
([], '', True),
(['a'], 'a', True),
(['a'], ' a ', True),
(['ab'], '\tab\n', True),
(['a', 'b'], 'a b\n', True),
(['a b'], r'a\ b', True),
(['a\\', 'b'], r'a\ b', False),
([r'\*.sh'], r'\\*.sh', True),
(['Aa', 'b', ' a b'], 'Aa b \\ a\\ b', True),
]
sp = legacy.WhitespaceSplitter(legacy.DEFAULT_IFS)
#sp = legacy.WhitespaceSplitter(legacy.DEFAULT_IFS)
sp = legacy.IfsSplitter(legacy.DEFAULT_IFS, '')
_RunSplitCases(self, sp, CASES)
self.assertEqual('a\ _b', sp.Escape('a _b'))
def testMixedIfs(self):
CASES = [
([], '', True),
@@ -52,81 +89,72 @@ def testMixedIfs(self):
(['a'], ' a _ ', True),
# Contrast with the case above.
# NOTES:
# - This cases REQUIRES ignoring leading whitespace. The state machine
# can't handle it.
# can't handle it. Contrast with the case above.
# - We get three spans with index 1 because of the initial rule to
# ignore whitespace, and then EMIT_EMPTY. Seems harmless for now?
(['', 'a'], ' _ a _ ', True),
# Backslash escape
(['a b'], r'a\ b', True),
(['a\\', 'b'], r'a\ b', False),
]
# IFS='_ '
sp = legacy.MixedSplitter(' ', '_')
sp = legacy.IfsSplitter(' ', '_')
_RunSplitCases(self, sp, CASES)
self.assertEqual('a\ \_b', sp.Escape('a _b'))
def testWhitespaceOnly(self):
CASES = [
([], '', True),
([], '\t', True),
(['a'], 'a\t', True),
(['a', 'b'], '\t\ta\tb\t', True),
# Backslash escape
(['a\tb'], 'a\\\tb', True),
(['a\\', 'b'], 'a\\\tb', False),
]
# IFS='_ '
sp = legacy.MixedSplitter('\t', '')
sp = legacy.IfsSplitter('\t', '')
_RunSplitCases(self, sp, CASES)
self.assertEqual('a b', sp.Escape('a b'))
self.assertEqual('a\\\tb', sp.Escape('a\tb'))
def testOtherOnly(self):
CASES = [
([], '', True),
([''], '_', True),
(['a'], 'a_', True),
(['', '', 'a', 'b'], '__a_b_', True),
# Backslash escape
(['a_b'], r'a\_b', True),
(['a\\', 'b'], r'a\_b', False),
]
# IFS='_ '
sp = legacy.MixedSplitter('', '_')
sp = legacy.IfsSplitter('', '_')
_RunSplitCases(self, sp, CASES)
def testTwoOther(self):
CASES = [
(['a', '', 'b', '', '', 'c', 'd'], 'a__b---c_d', True)
(['a', '', 'b', '', '', 'c', 'd'], 'a__b---c_d', True),
# Backslash escape
(['a_-b'], r'a\_\-b', True),
(['a\\', '\\', 'b'], r'a\_\-b', False),
]
# IFS='_ '
sp = legacy.MixedSplitter('', '_-')
sp = legacy.IfsSplitter('', '_-')
_RunSplitCases(self, sp, CASES)
class OldSplitTest(unittest.TestCase):
def testIfsSplitEmpty(self):
self.assertEqual(
[''], legacy.IfsSplit('', ' \t\n'))
self.assertEqual(
['', ''], legacy.IfsSplit(' ', ' \t\n'))
self.assertEqual(
[''], legacy.IfsSplit('', ' '))
# No word splitting when no IFS. Hm.
self.assertEqual(
[''], legacy.IfsSplit('', ''))
def testIfsSplit(self):
self.assertEqual(
['', 'foo', 'bar', ''],
legacy.IfsSplit('\tfoo bar\n', ' \t\n'))
self.assertEqual(
['\tfoo bar\n'],
legacy.IfsSplit('\tfoo bar\n', ''))
self.assertEqual(
['a', '', 'd'],
legacy.IfsSplit('abcd', 'bc'))
if __name__ == '__main__':
unittest.main()
View
@@ -8,6 +8,9 @@
module runtime
{
-- TODO: remove this after _Reframe is rewritten
fragment = (string s, bool do_elide, bool do_glob)
-- A static word_part from os.asdl is evaluated to a dynamic part_value.
part_value =
-- UndefPartValue is for internal processing only.
@@ -17,18 +20,7 @@ module runtime
| StringPartValue(string s, bool do_split_elide, bool do_glob)
-- "$@" or "${a[@]}" -- never split or globbed since double quoted.
| ArrayPartValue(string* strs)
-- part_values are split into fragments. Fragments may still be elided
-- and globbed.
fragment = (string s, bool do_elide, bool do_glob)
-- We reframe and join fragments into an array of arg_value. If any
-- fragment in an arg had do_glob set, the whole arg is globbed, with
-- quoted fragments being glob-escaped.
-- e.g. "my[]dir/"*.py -> my\[\]dir/*.py.
arg_value =
ConstArg(string s)
| GlobArg(string s) -- non-glob parts glob-escaped
| CompoundPartValue(part_value* children)
-- A static word from osh.asdl is evaluted to a dynamic value. value
-- instances are stored in memory.
@@ -66,5 +58,9 @@ module runtime
job_status =
ProcessStatus(int status)
| PipelineStatus(int* statuses)
-- Word splitting in legacy.py
span = Black | Delim | Backslash
}
View

Large diffs are not rendered by default.

Oops, something went wrong.
View
@@ -0,0 +1,270 @@
Notes on Word Evaluation
========================
There are a few contexts for word evaluation.
EvalWordSequence:
echo $s "${a[@]}"
declare -a a=( $s "${a[@]}" )
EvalWordToAny
a="$s ${a[@]}" (Although bash decays this)
EvalWordToString
echo foo > "$s ${a[@]}" # error because it should be a string
Glossary
--------
Unevaluated:
word
word_part
Evaluated:
part_value
fragment
fragment_groups? groups have one-on-one correspondence with words?
Every word should be a flat list of fragments?
frame
arg
Schema Changes
| StringPartValue(string s, bool quoted)
| fragment(string s, bool quoted)
Or maybe it should just be a flat array of StringPartValue?
Simpler Way?
--------
If the goal is just to elide $empty and not $empty""
And you never elide ANYTHING in "${a[@]}" or "$@"
This is just all in one word.
Logic: if the word consists of all StringPartValue which are unquoted and
IFS splitting gives NOTHING, then omit the whole word? That can come first?
I still need _Reframe : fargment groups into frames
fragment groups are things not separated by a hard barrier. "${a[@]}" has
internal barriers that can never be broken (they will never be joined)
EvalWordSequence
----------------
Let's talk about the hard case first. EvalWordSequence gets a list of words
from the parser (and after brace expansion), and it calls _EvalSplitGlob in a
loop on each word.
So the processing of each word is separate. Each word results in ZERO or more
argv entries. I'll call each entry an "arg" from now on.
Each CompoundWord is composed of an array of word_part. But this is actually
a tree, because of cases like this:
$ argv x${a:-"1 2" "3 4"}x
The ${a} substitution is a word part, but it might expand into an ARRAY of
word_part:
[ (DQ '1 2') (LiteralPart ' ') (DQ '3 4') ]
### Step 1: _EvalParts
This evaluates part, and then flattens out the part_value.CompoundPartValue
instances. So we're left with a flat list of StringPartValue and
ArrayPartValue. (Could this be encoded in the type system?)
The only way to get an ArrayPartValue is "$@" or "${a[@]}". These are not
split or globbed, so we don't have to worry about them. We just have to "pass
them through" unchanged.
### Step 2: FrameFragments
A fragment is either a StringPartValue or once PIECE of an ArrayPartValue.
You need to do the reframing, but preserve whether each StringPartValue is
quoted. ArrayPartValues are always quoted.
So now we have an array of fragments. Should be
[ fragment(s Str, quoted Bool), ... ]
Should we call these Frames?
### Elide Frames if IFS has whitespace
These are elided:
empty=''
argv $empty
argv ${empty:-}
These are not:
argv "${empty}"
argv ${empty:-''}
argv ${empty:-""}
argv ""
argv $empty"" # joining two parts
### Step 3: Maybe Glob Escape Frames
Now go over each frame. If no fragment in the frame is quoted, it's like this:
"$s"
"${a[@]}"
We can just pass these through as
any fragment in the frame is not quoted, then we
need to both split it and glob it.
Splitting comes first, and globbing comes second. So we have to eescape in
the OPPOSITE order.
### Step 4: Maybe IFS Escape Frames
### Step 5: Split Frames with IFS
Rules: IFS is split into other. State machine is very complex!
### Step 6: Glob Frames with Globber
Respect options: noglob, failglob, etc.
NOTE: globs inside strings are respected!
a='*.py'
same as:
a=*.py # no globbing yet
echo $a
Moral of the Story
------------------
This algorithm is horrible! It's almost impossible to reason about, and the
syntax is bad too. Oil will have something much simpler.
PROBLEMS:
How to avoid eliding ""?
I think if everything is quoted, then we can just
These become fragments.
Another Algorithm
-----------------
FOR EACH WORD
1. Eval - word_part
2. Flatten - word_part but no CompoundWordPart
(note: could combine these two steps if
_EvalWordPart(part, quoted=False) had an accumulator argument.
3. MakeFrames(word_parts) -- handle StringPartValue and ArrayValue
a=(1 '2 3' 4)
$x"${a[@]}"$y
This has three frames.
The middle frame is just appended.
The first and last frame have to undergo splitting.
This has one frame:
$empty""
One frame:
${empty:-}
A frame is zero or more args. It will never be joined with anything else.
# fragment is the same as StringPartvalue though.
fragment = (string s, bool quoted)
frame = (fragment* frags)
4. Elide certain frames. IFS is whitespace && everything is unquoted and
everythign is IFS
$a$b
CHOICE:
(A) 5. If everything in the frame is quoted, just join it end to end, and
emit it. Skip the next steps.
(B) 5. Join fragments in a frame, doing glob escaping and IFS escaping,
depending on "quoted" and depending on "noglob".
6. Split with IFS
7. Glob, appending to argv.
POSIX on "$@":
------------
Expands to the positional parameters, starting from one. When the expansion
occurs within double-quotes, and where field splitting (see Field Splitting)
is performed, each positional parameter shall expand as a separate field,
with the provision that the expansion of the first parameter shall still be
joined with the beginning part of the original word (assuming that the
expanded parameter was embedded within a word), and the expansion of the last
parameter shall still be joined with the last part of the original word. If
there are no positional parameters, the expansion of '@' shall generate zero
fields, even when '@' is double-quoted.
POSIX on Field Splitting
-------------------------
http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05
https://www.gnu.org/software/bash/manual/bashref.html#Word-Splitting
Summary:
1. ' \t\n' is special. Whitespace is trimmed off the front and back.
2. if IFS is '', no field splitting is performed.
3. Otherwise, suppose IFS = ' ,\t'. Then IFS whitespace is space or comma.
a. IFS whitespace isgnored at beginning and end.
b. any other IFS char delimits the field, along with adjacent IFS
whitespace.
c. IFS whitespace shall delimit a field.
View
@@ -10,6 +10,40 @@ echo ---
# stdout-json: "-\n--\n---\n"
# BUG zsh stdout-json: "\n--\n---\n"
### echo backslashes
echo \\
echo '\'
echo '\\'
echo "\\"
## STDOUT:
\
\
\\
\
## BUG dash/mksh/zsh STDOUT:
\
\
\
\
## END
### echo -e backslashes
echo -e \\
echo -e '\'
echo -e '\\'
echo -e "\\"
## STDOUT:
\
\
\
\
## N-I dash STDOUT:
-e \
-e \
-e \
-e \
## END
### echo -en
echo -en 'abc\ndef\n'
# stdout-json: "abc\ndef\n"
@@ -130,6 +164,13 @@ echo "[$x]"
# stdout: [A B C D E]
# status: 0
### Read from empty file
echo -n '' > $TMP/empty.txt
read x < $TMP/empty.txt
argv "status=$?" "$x"
# stdout: ['status=1', '']
# status: 0
### Read builtin with no newline.
# This is odd because the variable is populated successfully. OSH/Oil might
# need a separate put reading feature that doesn't use IFS.
@@ -165,6 +206,13 @@ argv.py $x $REPLY
# stdout: ['1234', '12']
# N-I dash/zsh stdout: []
### Read uses $REPLY (without -n)
echo 123 > $TMP/readreply.txt
read < $TMP/readreply.txt
echo $REPLY
# stdout: 123
# N-I dash stdout:
### read -r ignores backslashes
echo 'one\ two' > $TMP/readr.txt
read escaped < $TMP/readr.txt
@@ -182,13 +230,24 @@ argv "$escaped" "$raw"
# BUG mksh/zsh stdout: ['one twoethree', 'one\\ twoethree']
### read with line continuation reads multiple physical lines
echo -e 'one\\\ntwo\n' > $TMP/readr.txt
read escaped < $TMP/readr.txt
read -r raw < $TMP/readr.txt
tmp=$TMP/$(basename $SH)-readr.txt
echo -e 'one\\\ntwo\n' > $tmp
read escaped < $tmp
read -r raw < $tmp
argv "$escaped" "$raw"
# stdout: ['onetwo', 'one\\']
# N-I dash stdout: ['-e onetwo', '-e one\\']
### read multiple vars spanning many lines
read x y << 'EOF'
one-\
two three-\
four five-\
six
EOF
argv "$x" "$y" "$z"
# stdout: ['one-two', 'three-four five-six', '']
### read -r with \n
echo '\nline' > $TMP/readr.txt
read escaped < $TMP/readr.txt
@@ -210,7 +269,7 @@ echo "[$var]"
# stdout: [ a b c]
# N-I dash stdout: [a b c]
### Read with IFS=:
### Read multiple lines with IFS=:
# The leading spaces are stripped if they appear in IFS.
# IFS chars are escaped with :.
IFS=:
View
@@ -24,6 +24,25 @@ empty=
argv.py ${empty:-}
# stdout: []
### array with empty values
declare -a A=('' x "" '')
argv.py "${A[@]}"
# stdout: ['', 'x', '', '']
# N-I dash stdout-json: ""
# N-I dash status: 2
# N-I mksh stdout-json: ""
# N-I mksh status: 1
### substitution of IFS character, quoted and unquoted
IFS=:
s=:
argv.py $s
argv.py "$s"
## STDOUT:
['']
[':']
## END
### :-
empty=''
argv.py ${empty:-a} ${Unset:-b}
@@ -85,6 +104,15 @@ argv.py ${Unset:-"a b" c}
argv.py "${Unset:-"a b" c}"
# stdout: ['a b c']
### part_value tree with multiple words
argv ${a:-${a:-"1 2" "3 4"}5 "6 7"}
# stdout: ['1 2', '3 45', '6 7']
### part_value tree on RHS
v=${a:-${a:-"1 2" "3 4"}5 "6 7"}
argv "${v}"
# stdout: ['1 2 3 45 6 7']
### Var with multiple words: no quotes
var='a b c'
argv.py ${Unset:-$var}
View
@@ -66,10 +66,18 @@ argv.py $s1
### Word elision with non-whitespace IFS
# Treated differently than the default IFS. What is the rule here?
IFS=_
s1='_'
argv.py $s1
# stdout: ['']
IFS='_'
char='_'
space=' '
empty=''
argv.py $char
argv.py $space
argv.py $empty
## STDOUT:
['']
[' ']
[]
## END
### Leading/trailing word elision with non-whitespace IFS
# This behavior is weird.
View
@@ -204,7 +204,7 @@ comments() {
}
word-split() {
sh-spec spec/word-split.test.sh --osh-failures-allowed 3 \
sh-spec spec/word-split.test.sh \
${REF_SHELLS[@]} $OSH "$@"
}
@@ -257,7 +257,7 @@ builtins() {
}
builtin-io() {
sh-spec spec/builtin-io.test.sh --osh-failures-allowed 3 \
sh-spec spec/builtin-io.test.sh --osh-failures-allowed 2 \
${REF_SHELLS[@]} $ZSH $OSH "$@"
}
@@ -417,7 +417,7 @@ sh-options() {
}
xtrace() {
sh-spec spec/xtrace.test.sh --osh-failures-allowed 3 \
sh-spec spec/xtrace.test.sh --osh-failures-allowed 5 \
${REF_SHELLS[@]} $OSH "$@"
}
@@ -497,7 +497,7 @@ regex() {
process-sub() {
# mksh and dash don't support it
sh-spec spec/process-sub.test.sh --osh-failures-allowed 2 \
sh-spec spec/process-sub.test.sh \
$BASH $ZSH $OSH "$@"
}