Permalink
Browse files

Improvements to string slicing.

- Handle out of bounds index the same way bash and zsh do.
- Handle ${undef:3:1} properly (respecting nounset).
  • Loading branch information...
Andy Chu
Andy Chu committed Aug 17, 2018
1 parent cad1f8e commit 5bd633d4d1281ab3b1801833e996753f674a9b60
Showing with 39 additions and 12 deletions.
  1. +6 −2 core/libstr.py
  2. +14 −10 core/word_eval.py
  3. +19 −0 spec/var-op-other.test.sh
View
@@ -129,11 +129,15 @@ def AdvanceUtf8Chars(s, num_chars, byte_offset):
If we got past the end of the string
"""
num_bytes = len(s)
i = byte_offset # mutated
i = byte_offset # current byte position
for _ in xrange(num_chars):
# Neither bash or zsh checks out of bounds for slicing. Either begin or
# length.
if i >= num_bytes:
raise RuntimeError('Out of bounds')
return i
#raise RuntimeError('Out of bounds')
i = _NextUtf8Char(s, i)
return i
View
@@ -553,7 +553,7 @@ def _EvalBracedVarSub(self, part, part_vals, quoted):
val = self._ApplyUnarySuffixOp(val, part.suffix_op)
elif op.tag == suffix_op_e.PatSub: # PatSub, vectorized
val = self._EmptyStrOrError(val)
val = self._EmptyStrOrError(val) # ${undef//x/y}
pat_val = self.EvalWordToString(op.pat, do_fnmatch=True)
assert pat_val.tag == value_e.Str, pat_val
@@ -583,7 +583,8 @@ def _EvalBracedVarSub(self, part, part_vals, quoted):
raise AssertionError(val.__class__.__name__)
elif op.tag == suffix_op_e.Slice:
# TODO: Check out of bounds errors? begin could be past the beginning.
val = self._EmptyStrOrError(val) # ${undef:3:1}
if op.begin:
begin = self.arith_ev.Eval(op.begin)
else:
@@ -596,17 +597,20 @@ def _EvalBracedVarSub(self, part, part_vals, quoted):
if val.tag == value_e.Str: # Slice UTF-8 characters in a string.
s = val.s
if begin >= 0:
byte_begin = libstr.AdvanceUtf8Chars(s, begin, 0)
else:
if begin < 0:
# How do we count characters from the end? I guess we have to
# decode the whole thing.
raise NotImplementedError
if length is not None:
byte_end = libstr.AdvanceUtf8Chars(s, length, byte_begin)
else:
byte_begin = libstr.AdvanceUtf8Chars(s, begin, 0)
if length is None:
byte_end = len(s)
else:
if length < 0:
raise NotImplementedError
byte_end = libstr.AdvanceUtf8Chars(s, length, byte_begin)
val = runtime.Str(s[byte_begin : byte_end])
@@ -621,13 +625,13 @@ def _EvalBracedVarSub(self, part, part_vals, quoted):
val = runtime.StrArray(strs)
else:
raise AssertionError(val.__class__.__name__)
raise AssertionError(val.__class__.__name__) # Not possible
# After applying suffixes, process maybe_decay_array here.
if maybe_decay_array and val.tag == value_e.StrArray:
val = self._DecayArray(val)
# No prefix or suffix ops
# For the case where there are no prefix or suffix ops.
val = self._EmptyStrOrError(val)
# For example, ${a} evaluates to value_t.Str(), but we want a
View
@@ -339,6 +339,25 @@ echo ${foo: i-3-2 : i + 2}
## N-I dash status: 2
## N-I dash stdout-json: ""
#### Slice undefined
echo -${undef:1:2}-
set -o nounset
echo -${undef:1:2}-
echo -done-
## STDOUT:
--
## END
## status: 1
# mksh doesn't respect nounset!
## BUG mksh status: 0
## BUG mksh STDOUT:
--
--
-done-
## END
## N-I dash status: 2
## N-I dash stdout-json: ""
#### Slice UTF-8 String
# mksh slices by bytes.
foo='--μ--'

0 comments on commit 5bd633d

Please sign in to comment.