Permalink
Browse files

Handle arrays with partially unset elements (#126)

Python's None value is used for unset elements, as opposed to elements set to the empty string.
  • Loading branch information...
Yorwba authored and andychu committed Jun 1, 2018
1 parent f7fde14 commit acdfc71512db6d7ecfcac524846549e74cf3863d
Showing with 89 additions and 23 deletions.
  1. +3 −0 core/expr_eval.py
  2. +9 −5 core/state.py
  3. +5 −5 core/state_test.py
  4. +22 −13 core/word_eval.py
  5. +50 −0 spec/array.test.sh
View
@@ -185,6 +185,9 @@ def EvalLhs(node, arith_ev, mem, exec_opts):
try:
item = array[index]
except IndexError:
item = None
if item is None:
val = runtime.Str('')
else:
assert isinstance(item, str), item
View
@@ -676,13 +676,17 @@ def SetVar(self, lval, value, new_flags, lookup_mode):
# Then ${#a[@]} counts the entries that are not None.
#
# TODO: strict-array for Oil arrays won't auto-fill.
n = len(strs) - lval.index + 1
n = lval.index - len(strs) + 1
strs.extend([None] * n)
strs[lval.index] = value.s
else:
# TODO:
# - This is a bug, because a[2]=2 creates an array of length ONE, even
# though the index is two.
# When the array doesn't exist yet, it is created filled with None.
# Access to the array needs to explicitly filter those sentinel values.
# It also wastes memory. But indexed access is fast.
# What should be optimized for? Bash uses a linked list. Random access
# takes linear time, but iteration skips unset entries automatically.
# - Maybe represent as hash table? Then it's not an ASDL type?
# representations:
@@ -703,7 +707,7 @@ def SetVar(self, lval, value, new_flags, lookup_mode):
# ${!a[@]} - keys
# That seems pretty minimal.
items = [''] * lval.index
items = [None] * lval.index
items.append(value.s)
new_value = runtime.StrArray(items)
# arrays can't be exported
View
@@ -179,19 +179,19 @@ def testSetVarClearFlag(self):
mem.ClearFlag('PYTHONPATH', var_flags_e.Exported, scope_e.Dynamic)
self.assertEqual(False, mem.var_stack[0].vars['PYTHONPATH'].exported)
# a[2]=2
# a[1]=2
mem.SetVar(
runtime.LhsIndexedName('a', 1), runtime.Str('2'), (),
scope_e.Dynamic)
self.assertEqual(['', '2'], mem.var_stack[0].vars['a'].val.strs)
self.assertEqual([None, '2'], mem.var_stack[0].vars['a'].val.strs)
# a[2]=3
# a[1]=3
mem.SetVar(
runtime.LhsIndexedName('a', 1), runtime.Str('3'), (),
scope_e.Dynamic)
self.assertEqual(['', '3'], mem.var_stack[0].vars['a'].val.strs)
self.assertEqual([None, '3'], mem.var_stack[0].vars['a'].val.strs)
# a[2]=(x y z) # illegal
# a[1]=(x y z) # illegal
try:
mem.SetVar(
runtime.LhsIndexedName('a', 1), runtime.StrArray(['x', 'y', 'z']),
View
@@ -94,7 +94,7 @@ def _MakeWordFrames(part_vals):
current.append((p.s, p.do_split_glob))
elif p.tag == part_value_e.ArrayPartValue:
for i, s in enumerate(p.strs):
for i, s in enumerate(s for s in p.strs if s is not None):
if i == 0:
current.append((s, False)) # don't split or glob
else:
@@ -116,11 +116,7 @@ def _DecayPartValuesToString(part_vals, join_char):
if p.tag == part_value_e.StringPartValue:
out.append(p.s)
else:
last = len(p.strs) - 1
for i, s in enumerate(p.strs):
out.append(s)
if i != last:
out.append(join_char)
out.append(join_char.join(s for s in p.strs if s is not None))
return ''.join(out)
@@ -304,8 +300,8 @@ def _ApplyPrefixOp(self, val, op_id):
if val.tag == value_e.Str:
length = len(val.s)
elif val.tag == value_e.StrArray:
# TODO: There can be empty placeholder values in the array.
length = len(val.strs)
# There can be empty placeholder values in the array.
length = sum(1 for s in val.strs if s is not None)
return runtime.Str(str(length))
elif op_id == Id.VSub_Bang:
# NOTES:
@@ -337,7 +333,8 @@ def _ApplyUnarySuffixOp(self, val, op):
# ${a[@]#prefix} is VECTORIZED on arrays. Oil should have this too.
strs = []
for s in val.strs:
strs.append(libstr.DoUnarySuffixOp(s, op, arg_val.s))
if s is not None:
strs.append(libstr.DoUnarySuffixOp(s, op, arg_val.s))
new_val = runtime.StrArray(strs)
else:
@@ -375,7 +372,7 @@ def _EvalDoubleQuotedPart(self, part, part_vals):
def _DecayArray(self, val):
assert val.tag == value_e.StrArray, val
sep = self.splitter.GetJoinChar()
return runtime.Str(sep.join(val.strs))
return runtime.Str(sep.join(s for s in val.strs if s is not None))
def _EmptyStrOrError(self, val, token=None):
assert isinstance(val, runtime.value), val
@@ -481,6 +478,9 @@ def _EvalBracedVarSub(self, part, part_vals, quoted):
try:
s = val.strs[index]
except IndexError:
s = None
if s is None:
val = runtime.Undef()
else:
val = runtime.Str(s)
@@ -559,7 +559,8 @@ def _EvalBracedVarSub(self, part, part_vals, quoted):
elif val.tag == value_e.StrArray:
strs = []
for s in val.strs:
strs.append(libstr.PatSub(s, op, pat, replace_str))
if s is not None:
strs.append(libstr.PatSub(s, op, pat, replace_str))
val = runtime.StrArray(strs)
else:
@@ -578,14 +579,22 @@ def _EvalBracedVarSub(self, part, part_vals, quoted):
length = self.arith_ev.Eval(op.length)
end = begin + length
else:
length = None
end = None # Python supports None as the end
if val.tag == value_e.Str: # Slice characters in a string.
# TODO: Need to support unicode? Write spec # tests.
val = runtime.Str(val.s[begin : end])
elif val.tag == value_e.StrArray: # Slice array entries.
val = runtime.StrArray(val.strs[begin : end])
# NOTE: unset elements don't count towards the length
strs = []
for s in val.strs[begin:]:
if s is not None:
strs.append(s)
if len(strs) == length: # never true for unspecified length
break
val = runtime.StrArray(strs)
else:
raise AssertionError(val.__class__.__name__)
@@ -746,7 +755,7 @@ def EvalWordToString(self, word, do_fnmatch=False, decay=False):
# 'To assign arrays, using b=( "${a[@]}" )')
else:
# It appears to not respect IFS
s = ' '.join(part_val.strs)
s = ' '.join(s for s in part_val.strs if s is not None)
strs.append(s)
View
@@ -390,3 +390,53 @@ f
['x']
## END
### Create sparse array
a=()
(( a[99]=1 )) # osh doesn't parse index assignment outside arithmetic yet
echo len=${#a[@]}
argv.py "${a[@]}"
echo "unset=${a[33]}"
echo len-of-unset=${#a[33]}
## STDOUT:
len=1
['1']
unset=
len-of-unset=0
## END
### Create sparse array implicitly
(( a[99]=1 ))
echo len=${#a[@]}
argv.py "${a[@]}"
echo "unset=${a[33]}"
echo len-of-unset=${#a[33]}
## STDOUT:
len=1
['1']
unset=
len-of-unset=0
## END
### Append sparse arrays
a=()
(( a[99]=1 ))
b=()
(( b[33]=2 ))
(( b[66]=3 ))
a+=( "${b[@]}" )
argv.py "${a[@]}"
argv.py "${a[99]}" "${a[100]}" "${a[101]}"
## STDOUT:
['1', '2', '3']
['1', '2', '3']
## END
### Slice of sparse array with [@]
# mksh doesn't support this syntax! It's a bash extension.
(( a[33]=1 ))
(( a[66]=2 ))
(( a[99]=2 ))
argv.py "${a[@]:15:2}"
# stdout: ['1', '2']
# N-I mksh status: 1
# N-I mksh stdout-json: ""

0 comments on commit acdfc71

Please sign in to comment.