/
word_eval.py
1866 lines (1548 loc) · 60.4 KB
/
word_eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
word_eval.py - Evaluator for the word language.
"""
from _devbuild.gen.id_kind_asdl import Id, Kind, Id_str, Kind_str
from _devbuild.gen.syntax_asdl import (
braced_var_sub, Token,
word, word_e, word_t, compound_word,
bracket_op_e, bracket_op__ArrayIndex, bracket_op__WholeArray,
suffix_op_e, suffix_op__Nullary, suffix_op__PatSub, suffix_op__Slice,
suffix_op__Unary,
sh_array_literal, single_quoted, double_quoted, simple_var_sub,
command_sub,
word_part_e, word_part__ArithSub, word_part__EscapedLiteral,
word_part__AssocArrayLiteral, word_part__ExprSub, word_part__ExtGlob,
word_part__FuncCall, word_part__Splice, word_part__TildeSub,
)
from _devbuild.gen.runtime_asdl import (
builtin_e, effect_e,
part_value, part_value_e, part_value_t, part_value__String,
part_value__Array,
value, value_e, value_t, value__Str, value__AssocArray,
value__MaybeStrArray, value__Obj,
lvalue, assign_arg,
cmd_value_e, cmd_value_t, cmd_value, cmd_value__Assign, cmd_value__Argv,
quote_e, quote_t,
)
from core import builtin_def
from core import error
from core import passwd
from core import process
from core import state
from core.util import log, e_die, e_strict
from frontend import consts
from frontend import match
from mycpp.mylib import tagswitch
from mycpp import mylib
from osh import braces
from osh import glob_
from osh import string_ops
from osh import word_
from osh import word_compile
import posix_ as posix
from typing import Optional, Tuple, List, Dict, cast, TYPE_CHECKING
if TYPE_CHECKING:
from _devbuild.gen.id_kind_asdl import Id_t
from _devbuild.gen.syntax_asdl import (
command_t, speck, word_part_t
)
from _devbuild.gen.runtime_asdl import (
builtin_t, effect_t, lvalue__Named
)
from core.ui import ErrorFormatter
from core import optview
from osh import cmd_exec
from osh.split import SplitContext
from core.state import Mem
from osh import prompt
from osh import sh_expr_eval
from oil_lang import expr_eval
# For compatibility, ${BASH_SOURCE} and ${BASH_SOURCE[@]} are both valid.
_STRING_AND_ARRAY = 'BASH_SOURCE'
def EvalSingleQuoted(part):
# type: (single_quoted) -> str
if part.left.id == Id.Left_SingleQuoteRaw:
tmp = [t.val for t in part.tokens]
s = ''.join(tmp)
elif part.left.id == Id.Left_SingleQuoteC:
# NOTE: This could be done at compile time
# TODO: Add location info for invalid backslash
tmp = [word_compile.EvalCStringToken(t.id, t.val) for t in part.tokens]
s = ''.join(tmp)
else:
raise AssertionError(Id_str(part.left.id))
return s
# NOTE: Could be done with util.BackslashEscape like glob_.GlobEscape().
def _BackslashEscape(s):
# type: (str) -> str
"""Double up backslashes.
Useful for strings about to be globbed and strings about to be IFS escaped.
"""
return s.replace('\\', '\\\\')
def _ValueToPartValue(val, quoted):
# type: (value_t, bool) -> part_value_t
"""Helper for VarSub evaluation.
Called by _EvalBracedVarSub and _EvalWordPart for SimpleVarSub.
"""
UP_val = val
with tagswitch(val) as case:
if case(value_e.Str):
val = cast(value__Str, UP_val)
return part_value.String(val.s, quoted, not quoted)
elif case(value_e.MaybeStrArray):
val = cast(value__MaybeStrArray, UP_val)
return part_value.Array(val.strs)
elif case(value_e.AssocArray):
val = cast(value__AssocArray, UP_val)
# TODO: Is this correct?
return part_value.Array(val.d.values())
elif case(value_e.Obj):
if mylib.PYTHON:
val = cast(value__Obj, UP_val)
return part_value.String(str(val.obj), quoted, not quoted)
# Not in C++
raise AssertionError()
else:
# Undef should be caught by _EmptyStrOrError().
raise AssertionError(val.tag_())
def _MakeWordFrames(part_vals):
# type: (List[part_value_t]) -> List[List[Tuple[str, bool, bool]]]
"""
A word evaluates to a flat list of part_value (String or Array). frame is a
portion that results in zero or more args. It can never be joined. This
idea exists because of arrays like "$@" and "${a[@]}".
Example:
a=(1 '2 3' 4)
x=x
y=y
$x"${a[@]}"$y
Three frames:
[ ('x', False), ('1', True) ]
[ ('2 3', True) ]
[ ('4', True), ('y', False ]
"""
current = [] # type: List[Tuple[str, bool, bool]]
frames = [current]
for p in part_vals:
UP_p = p
with tagswitch(p) as case:
if case(part_value_e.String):
p = cast(part_value__String, UP_p)
current.append((p.s, p.quoted, p.do_split))
elif case(part_value_e.Array):
p = cast(part_value__Array, UP_p)
is_first = True
for s in p.strs:
if s is None:
continue # ignore undefined array entries
# Arrays parts are always quoted; otherwise they would have decayed to
# a string.
portion = (s, True, False)
if is_first:
current.append(portion)
is_first = False
else:
current = [portion]
frames.append(current) # singleton frame
else:
raise AssertionError()
return frames
# TODO: This could be _MakeWordFrames and then sep.join(). It's redunant.
def _DecayPartValuesToString(part_vals, join_char):
# type: (List[part_value_t], str) -> str
# Decay ${a=x"$@"x} to string.
out = [] # type: List[str]
for p in part_vals:
UP_p = p
with tagswitch(p) as case:
if case(part_value_e.String):
p = cast(part_value__String, UP_p)
out.append(p.s)
else:
p = cast(part_value__Array, UP_p)
# TODO: Eliminate double join for speed?
tmp = [s for s in p.strs if s is not None]
out.append(join_char.join(tmp))
return ''.join(out)
def _PerformSlice(val, # type: value_t
begin, # type: int
length, # type: int
has_length, # type: bool
part, # type: braced_var_sub
):
# type: (...) -> value_t
UP_val = val
with tagswitch(val) as case:
if case(value_e.Str): # Slice UTF-8 characters in a string.
val = cast(value__Str, UP_val)
s = val.s
if begin < 0:
# It could be negative if we compute unicode length, but that's
# confusing.
# TODO: Instead of attributing it to the word part, it would be
# better if we attributed it to arith_expr begin.
e_strict(
"The start index of a string slice can't be negative: %d",
begin, part=part)
byte_begin = string_ops.AdvanceUtf8Chars(s, begin, 0)
if has_length:
if length < 0:
# TODO: Instead of attributing it to the word part, it would be
# better if we attributed it to arith_expr begin.
e_strict(
"The length of a string slice can't be negative: %d",
length, part=part)
byte_end = string_ops.AdvanceUtf8Chars(s, length, byte_begin)
else:
byte_end = len(s)
substr = s[byte_begin : byte_end]
val = value.Str(substr)
elif case(value_e.MaybeStrArray): # Slice array entries.
val = cast(value__MaybeStrArray, UP_val)
# NOTE: This error is ALWAYS fatal in bash. It's inconsistent with
# strings.
if has_length and length < 0:
e_die("The length index of a array slice can't be negative: %d",
length, part=part)
# NOTE: unset elements don't count towards the length.
strs = [] # type: List[str]
for s in val.strs[begin:]:
if s is not None:
strs.append(s)
if has_length and len(strs) == length:
break
val = value.MaybeStrArray(strs)
elif case(value_e.AssocArray):
e_die("Can't slice associative arrays", part=part)
else:
raise NotImplementedError(val.tag_())
return val
class StringWordEvaluator(object):
"""For use by the _ExprEvaluator."""
def EvalWordToString(self, w, quote_kind=quote_e.Default):
# type: (word_t, quote_t) -> value__Str
raise NotImplementedError()
def _GetDollarHyphen(exec_opts):
# type: (optview.Exec) -> str
chars = [] # type: List[str]
if exec_opts.interactive():
chars.append('i')
if exec_opts.errexit():
chars.append('e')
if exec_opts.nounset():
chars.append('u')
# NO letter for pipefail?
if exec_opts.xtrace():
chars.append('x')
if exec_opts.noexec():
chars.append('n')
# bash has:
# - c for sh -c, i for sh -i (mksh also has this)
# - h for hashing (mksh also has this)
# - B for brace expansion
return ''.join(chars)
class AbstractWordEvaluator(StringWordEvaluator):
"""Abstract base class for word evaluators.
Public entry points:
EvalWordToString
EvalForPlugin
EvalRhsWord
EvalWordSequence
EvalWordSequence2
"""
def __init__(self, mem, exec_opts, splitter, errfmt):
# type: (Mem, optview.Exec, SplitContext, ErrorFormatter) -> None
self.arith_ev = None # type: sh_expr_eval.ArithEvaluator
self.expr_ev = None # type: expr_eval.OilEvaluator
self.prompt_ev = None # type: prompt.Evaluator
self.mem = mem # for $HOME, $1, etc.
self.exec_opts = exec_opts # for nounset
self.splitter = splitter
self.errfmt = errfmt
self.globber = glob_.Globber(exec_opts)
def CheckCircularDeps(self):
# type: () -> None
raise NotImplementedError()
def _EvalCommandSub(self, part, quoted):
# type: (command_t, bool) -> part_value_t
"""Abstract since it has a side effect.
Args:
part: command_sub
Returns:
part_value
"""
raise NotImplementedError()
def _EvalProcessSub(self, part, id_):
# type: (command_t, int) -> part_value_t
"""Abstract since it has a side effect.
Args:
part: command_sub
Returns:
part_value
"""
raise NotImplementedError()
def _EvalTildeSub(self, token):
# type: (Token) -> str
"""Evaluates ~ and ~user.
Args:
prefix: The tilde prefix (possibly empty)
"""
if token.val == '~':
# First look up the HOME var, then ask the OS. This is what bash does.
val = self.mem.GetVar('HOME')
UP_val = val
if val.tag_() == value_e.Str:
val = cast(value__Str, UP_val)
return val.s
return process.GetHomeDir()
return passwd.GetHomeDir(token)
def _EvalVarNum(self, var_num):
# type: (int) -> value_t
assert var_num >= 0
return self.mem.GetArgNum(var_num)
def _EvalSpecialVar(self, op_id, quoted):
# type: (int, bool) -> Tuple[value_t, bool]
"""Returns (val, bool maybe_decay_array).
TODO: Should that boolean be part of the value?
"""
# $@ is special -- it need to know whether it is in a double quoted
# context.
#
# - If it's $@ in a double quoted context, return an ARRAY.
# - If it's $@ in a normal context, return a STRING, which then will be
# subject to splitting.
maybe_decay_array = False
if op_id in (Id.VSub_At, Id.VSub_Star):
argv = self.mem.GetArgv()
val = value.MaybeStrArray(argv) # type: value_t
if op_id == Id.VSub_At:
# "$@" evaluates to an array, $@ should be decayed
maybe_decay_array = not quoted
else: # $* "$*" are both decayed
maybe_decay_array = True
elif op_id == Id.VSub_Hyphen:
val = value.Str(_GetDollarHyphen(self.exec_opts))
else:
val = self.mem.GetSpecialVar(op_id)
return val, maybe_decay_array
def _ApplyTestOp(self,
val, # type: value_t
op, # type: suffix_op__Unary
quoted, # type: bool
part_vals, # type: Optional[List[part_value_t]]
):
# type: (...) -> Tuple[List[part_value_t], effect_t]
"""
Returns:
effect_part_vals, effect_e
${a:-} returns part_value[]
${a:+} returns part_value[]
${a:?error} returns error word?
${a:=} returns part_value[] but also needs self.mem for side effects.
So I guess it should return part_value[], and then a flag for raising an
error, and then a flag for assigning it?
The original BracedVarSub will have the name.
Example of needing multiple part_value[]
echo X-${a:-'def'"ault"}-X
We return two part values from the BracedVarSub. Also consider:
echo ${a:-x"$@"x}
"""
undefined = (val.tag_() == value_e.Undef)
no = None # type: List[part_value_t]
# TODO: Change this to a bitwise test?
if op.op_id in (
Id.VTest_ColonHyphen, Id.VTest_ColonEquals, Id.VTest_ColonQMark,
Id.VTest_ColonPlus):
UP_val = val
with tagswitch(val) as case:
if case(value_e.Undef):
is_falsey = True
elif case(value_e.Str):
val = cast(value__Str, UP_val)
is_falsey = not val.s
elif case(value_e.MaybeStrArray):
val = cast(value__MaybeStrArray, UP_val)
is_falsey = not val.strs
else:
raise NotImplementedError(val.tag_())
else:
is_falsey = undefined
#print('!!',id, is_falsey)
if op.op_id in (Id.VTest_ColonHyphen, Id.VTest_Hyphen):
if is_falsey:
assert op.arg_word
self._EvalWordToParts(op.arg_word, quoted, part_vals, is_subst=True)
return no, effect_e.SpliceParts
else:
return no, effect_e.NoOp
elif op.op_id in (Id.VTest_ColonPlus, Id.VTest_Plus):
# Inverse of the above.
if is_falsey:
return no, effect_e.NoOp
else:
assert op.arg_word
self._EvalWordToParts(op.arg_word, quoted, part_vals, is_subst=True)
return no, effect_e.SpliceParts
elif op.op_id in (Id.VTest_ColonEquals, Id.VTest_Equals):
if is_falsey:
# Collect new part vals.
assign_part_vals = [] # type: List[part_value_t]
self._EvalWordToParts(op.arg_word, quoted, assign_part_vals,
is_subst=True)
# Append them to out param AND return them.
part_vals.extend(assign_part_vals)
return assign_part_vals, effect_e.SpliceAndAssign
else:
return no, effect_e.NoOp
elif op.op_id in (Id.VTest_ColonQMark, Id.VTest_QMark):
if is_falsey:
# The arg is the error mesage
error_part_vals = [] # type: List[part_value_t]
self._EvalWordToParts(op.arg_word, quoted, error_part_vals,
is_subst=True)
return error_part_vals, effect_e.Error
else:
return no, effect_e.NoOp
else:
raise NotImplementedError(Id_str(op.op_id))
def _EvalIndirectArrayExpansion(self, name, index):
# type: (str, str) -> Optional[value_t]
"""Expands ${!ref} when $ref has the form `name[index]`.
Args:
name, index: arbitrary strings
Returns:
value, or None if invalid
"""
if not match.IsValidVarName(name):
return None
val = self.mem.GetVar(name)
UP_val = val
with tagswitch(val) as case:
if case(value_e.Undef):
return value.Undef()
elif case(value_e.Str):
return None
elif case(value_e.MaybeStrArray):
val = cast(value__MaybeStrArray, UP_val)
if index in ('@', '*'):
# TODO: maybe_decay_array
return value.MaybeStrArray(val.strs)
try:
index_num = int(index)
except ValueError:
return None
try:
return value.Str(val.strs[index_num])
except IndexError:
return value.Undef()
elif case(value_e.AssocArray):
val = cast(value__AssocArray, UP_val)
if index in ('@', '*'):
raise NotImplementedError()
try:
return value.Str(val.d[index])
except KeyError:
return value.Undef()
else:
raise AssertionError()
def _ApplyPrefixOp(self, val, prefix_op, token):
# type: (value_t, speck, Token) -> value_t
"""
Returns:
value
"""
assert val.tag != value_e.Undef
op_id = prefix_op.id
if op_id == Id.VSub_Pound: # LENGTH
UP_val = val
with tagswitch(val) as case:
if case(value_e.Str):
val = cast(value__Str, UP_val)
# NOTE: Whether bash counts bytes or chars is affected by LANG
# environment variables.
# Should we respect that, or another way to select? set -o
# count-bytes?
# https://stackoverflow.com/questions/17368067/length-of-string-in-bash
try:
length = string_ops.CountUtf8Chars(val.s)
except error.Strict as e:
# Add this here so we don't have to add it so far down the stack.
# TODO: It's better to show BOTH this CODE an the actual DATA
# somehow.
e.span_id = token.span_id
if self.exec_opts.strict_word_eval():
raise
else:
# NOTE: Doesn't make the command exit with 1; it just returns a
# length of -1.
self.errfmt.PrettyPrintError(e, prefix='warning: ')
return value.Str('-1')
elif case(value_e.MaybeStrArray):
val = cast(value__MaybeStrArray, UP_val)
# There can be empty placeholder values in the array.
length = 0
for s in val.strs:
if s is not None:
length += 1
elif case(value_e.AssocArray):
val = cast(value__AssocArray, UP_val)
length = len(val.d)
else:
raise AssertionError()
return value.Str(str(length))
elif op_id == Id.VSub_Bang: # ${!foo}, "indirect expansion"
# NOTES:
# - Could translate to eval('$' + name) or eval("\$$name")
# - ${!array[@]} means something completely different. TODO: implement
# that.
# - It might make sense to suggest implementing this with associative
# arrays?
UP_val = val
with tagswitch(val) as case:
if case(value_e.Str):
val = cast(value__Str, UP_val)
# plain variable name, like 'foo'
if match.IsValidVarName(val.s):
return self.mem.GetVar(val.s)
# positional argument, like '1'
try:
return self.mem.GetArgNum(int(val.s))
except ValueError:
pass
if val.s in ('@', '*'):
# TODO: maybe_decay_array
return value.MaybeStrArray(self.mem.GetArgv())
# otherwise an array reference, like 'arr[0]' or 'arr[xyz]' or 'arr[@]'
i = val.s.find('[')
if i >= 0 and val.s[-1] == ']':
name = val.s[:i]
index = val.s[i+1:-1]
result = self._EvalIndirectArrayExpansion(name, index)
if result is not None:
return result
# Note that bash doesn't consider this fatal. It makes the
# command exit with '1', but we don't have that ability yet?
e_die('Bad indirect expansion: %r', val.s, token=token)
elif case(value_e.MaybeStrArray):
val = cast(value__MaybeStrArray, UP_val)
# translation issue: tuple indices not supported in list comprehensions
#indices = [str(i) for i, s in enumerate(val.strs) if s is not None]
indices = [] # type: List[str]
for i, s in enumerate(val.strs):
if s is not None:
indices.append(str(i))
return value.MaybeStrArray(indices)
elif case(value_e.AssocArray):
val = cast(value__AssocArray, UP_val)
assert val.d is not None # for MyPy, so it's not Optional[]
return value.MaybeStrArray(val.d.keys())
else:
raise NotImplementedError(val.tag_())
else:
raise AssertionError(op_id)
def _ApplyUnarySuffixOp(self, val, op):
# type: (value_t, suffix_op__Unary) -> value_t
assert val.tag != value_e.Undef
op_kind = consts.GetKind(op.op_id)
if op_kind == Kind.VOp1:
# NOTE: glob syntax is supported in ^ ^^ , ,, ! As well as % %% # ##.
arg_val = self.EvalWordToString(op.arg_word, quote_kind=quote_e.FnMatch)
assert arg_val.tag == value_e.Str
UP_val = val
with tagswitch(val) as case:
if case(value_e.Str):
val = cast(value__Str, UP_val)
s = string_ops.DoUnarySuffixOp(val.s, op, arg_val.s)
#log('%r %r -> %r', val.s, arg_val.s, s)
new_val = value.Str(s) # type: value_t
elif case(value_e.MaybeStrArray):
val = cast(value__MaybeStrArray, UP_val)
# ${a[@]#prefix} is VECTORIZED on arrays. Oil should have this too.
strs = [] # type: List[str]
for s in val.strs:
if s is not None:
strs.append(string_ops.DoUnarySuffixOp(s, op, arg_val.s))
new_val = value.MaybeStrArray(strs)
elif case(value_e.AssocArray):
val = cast(value__AssocArray, UP_val)
strs = []
for s in val.d.values():
strs.append(string_ops.DoUnarySuffixOp(s, op, arg_val.s))
new_val = value.MaybeStrArray(strs)
else:
raise AssertionError(val.tag_())
else:
raise AssertionError(Kind_str(op_kind))
return new_val
def _EvalDoubleQuoted(self,
parts, # type: List[word_part_t]
part_vals, # type: List[part_value_t]
):
# type: (...) -> None
"""DoubleQuoted -> part_value
Args:
part_vals: output param to append to.
"""
# Example of returning array:
# $ a=(1 2); b=(3); $ c=(4 5)
# $ argv "${a[@]}${b[@]}${c[@]}"
# ['1', '234', '5']
# Example of multiple parts
# $ argv "${a[@]}${undef[@]:-${c[@]}}"
# ['1', '24', '5']
#log('DQ part %s', part)
# Special case for "". The parser outputs (DoubleQuoted []), instead
# of (DoubleQuoted [Literal '']). This is better but it means we
# have to check for it.
if len(parts) == 0:
v = part_value.String('', True, False)
part_vals.append(v)
return
for p in parts:
self._EvalWordPart(p, part_vals, quoted=True)
def EvalDoubleQuotedToString(self, dq_part):
# type: (double_quoted) -> str
"""For double quoted strings in Oil expressions.
Example: var x = "$foo-${foo}"
"""
part_vals = [] # type: List[part_value_t]
self._EvalDoubleQuoted(dq_part.parts, part_vals)
return self._PartValsToString(part_vals, dq_part.left.span_id)
def _DecayArray(self, val):
# type: (value__MaybeStrArray) -> value__Str
"""Decay $* to a string."""
assert val.tag == value_e.MaybeStrArray, val
sep = self.splitter.GetJoinChar()
tmp = [s for s in val.strs if s is not None]
return value.Str(sep.join(tmp))
def _BashArrayCompat(self, val):
# type: (value__MaybeStrArray) -> value__Str
"""Decay ${array} to ${array[0]}."""
assert val.tag == value_e.MaybeStrArray, val
s = val.strs[0] if val.strs else ''
return value.Str(s)
def _EmptyStrOrError(self, val, token=None):
# type: (value_t, Optional[Token]) -> value_t
if val.tag_() == value_e.Undef:
if self.exec_opts.nounset():
if token is None:
e_die('Undefined variable')
else:
name = token.val[1:] if token.val.startswith('$') else token.val
e_die('Undefined variable %r', name, token=token)
else:
return value.Str('')
else:
return val
def _EmptyMaybeStrArrayOrError(self, token):
# type: (Token) -> value_t
assert token is not None
if self.exec_opts.nounset():
e_die('Undefined array %r', token.val, token=token)
else:
return value.MaybeStrArray([])
def _EvalBracedVarSub(self, part, part_vals, quoted):
# type: (braced_var_sub, List[part_value_t], bool) -> None
"""
Args:
part_vals: output param to append to.
"""
# We have four types of operator that interact.
#
# 1. Bracket: value -> (value, bool maybe_decay_array)
#
# 2. Then these four cases are mutually exclusive:
#
# a. Prefix length: value -> value
# b. Test: value -> part_value[]
# c. Other Suffix: value -> value
# d. no operator: you have a value
#
# That is, we don't have both prefix and suffix operators.
#
# 3. Process maybe_decay_array here before returning.
maybe_decay_array = False # for $*, ${a[*]}, etc.
bash_array_compat = False # for ${BASH_SOURCE}
var_name = None # type: str # For ${foo=default}
# 1. Evaluate from (var_name, var_num, token Id) -> value
if part.token.id == Id.VSub_Name:
var_name = part.token.val
# TODO: LINENO can use its own span_id!
val = self.mem.GetVar(var_name)
elif part.token.id == Id.VSub_Number:
var_num = int(part.token.val)
val = self._EvalVarNum(var_num)
else:
# $* decays
val, maybe_decay_array = self._EvalSpecialVar(part.token.id, quoted)
# 2. Bracket: value -> (value v, bool maybe_decay_array)
# maybe_decay_array is for joining ${a[*]} and unquoted ${a[@]} AFTER
# suffix ops are applied. If we take the length with a prefix op, the
# distinction is ignored.
if part.bracket_op:
bracket_op = part.bracket_op
UP_bracket_op = bracket_op
with tagswitch(bracket_op) as case:
if case(bracket_op_e.WholeArray):
bracket_op = cast(bracket_op__WholeArray, UP_bracket_op)
op_id = bracket_op.op_id
if op_id == Id.Lit_At:
maybe_decay_array = not quoted # ${a[@]} decays but "${a[@]}" doesn't
UP_val = val
with tagswitch(val) as case2:
if case2(value_e.Undef):
val = self._EmptyMaybeStrArrayOrError(part.token)
elif case2(value_e.Str):
val = cast(value__Str, UP_val)
e_die("Can't index string with @: %r", val, part=part)
elif case2(value_e.MaybeStrArray):
val = cast(value__MaybeStrArray, UP_val)
# TODO: Is this a no-op? Just leave 'val' alone.
val = value.MaybeStrArray(val.strs)
elif op_id == Id.Arith_Star:
maybe_decay_array = True # both ${a[*]} and "${a[*]}" decay
UP_val = val
with tagswitch(val) as case2:
if case2(value_e.Undef):
val = self._EmptyMaybeStrArrayOrError(part.token)
elif case2(value_e.Str):
val = cast(value__Str, UP_val)
e_die("Can't index string with *: %r", val, part=part)
elif case2(value_e.MaybeStrArray):
val = cast(value__MaybeStrArray, UP_val)
# TODO: Is this a no-op? Just leave 'val' alone.
# ${a[*]} or "${a[*]}" : maybe_decay_array is always true
val = value.MaybeStrArray(val.strs)
else:
raise AssertionError(op_id) # unknown
elif case(bracket_op_e.ArrayIndex):
bracket_op = cast(bracket_op__ArrayIndex, UP_bracket_op)
anode = bracket_op.expr
UP_val = val
with tagswitch(val) as case2:
if case2(value_e.Undef):
pass # it will be checked later
elif case2(value_e.Str):
# Bash treats any string as an array, so we can't add our own
# behavior here without making valid OSH invalid bash.
e_die("Can't index string %r with integer", part.token.val,
token=part.token)
elif case2(value_e.MaybeStrArray):
array_val = cast(value__MaybeStrArray, UP_val)
index = self.arith_ev.EvalToInt(anode)
try:
# could be None because representation is sparse
s = array_val.strs[index]
except IndexError:
s = None
if s is None:
val = value.Undef()
else:
val = value.Str(s)
elif case2(value_e.AssocArray):
assoc_val = cast(value__AssocArray, UP_val)
key = self.arith_ev.EvalWordToString(anode)
s = assoc_val.d.get(key)
if s is None:
val = value.Undef()
else:
val = value.Str(s)
else:
raise AssertionError(val.tag_())
else:
raise AssertionError(bracket_op.tag_())
else: # no bracket op
# When the array is "$@", var_name is None
if var_name and val.tag_() in (value_e.MaybeStrArray, value_e.AssocArray):
if var_name == _STRING_AND_ARRAY:
bash_array_compat = True
else:
e_die("Array %r can't be referred to as a scalar (without @ or *)",
var_name, part=part)
if part.prefix_op:
val = self._EmptyStrOrError(val) # maybe error
if part.suffix_op:
# Must be ${!prefix@}
assert part.prefix_op.id == Id.VSub_Bang
names = self.mem.VarNamesStartingWith(part.token.val)
names.sort()
val = value.MaybeStrArray(names)
# Test for maybe_decay_array
UP_suffix_op = part.suffix_op
if UP_suffix_op.tag_() == suffix_op_e.Nullary:
suffix_op = cast(suffix_op__Nullary, UP_suffix_op)
# "${!prefix@}" is the only one that doesn't decay
maybe_decay_array = not (quoted and suffix_op.op_id == Id.VOp3_At)
else:
raise AssertionError()
else:
# TODO: maybe_decay_array for "${!assoc[@]}" vs. ${!assoc[*]}
val = self._ApplyPrefixOp(val, part.prefix_op, part.token)
# NOTE: When applying the length operator, we can't have a test or
# suffix afterward. And we don't want to decay the array
elif part.suffix_op:
op = part.suffix_op
UP_op = op
with tagswitch(op) as case:
if case(suffix_op_e.Nullary):
op = cast(suffix_op__Nullary, UP_op)
if op.op_id == Id.VOp0_P:
prompt = self.prompt_ev.EvalPrompt(val)
# readline gets rid of these, so we should too.
p = prompt.replace('\x01', '').replace('\x02', '')
val = value.Str(p)
elif op.op_id == Id.VOp0_Q:
assert val.tag_() == value_e.Str, val
val = cast(value__Str, val)
val = value.Str(string_ops.ShellQuote(val.s))
else:
raise NotImplementedError(op.op_id)
elif case(suffix_op_e.Unary):
op = cast(suffix_op__Unary, UP_op)
if consts.GetKind(op.op_id) == Kind.VTest:
# TODO: Change style to:
# if self._ApplyTestOp(...)
# return
# It should return whether anything was done. If not, we continue to
# the end, where we might throw an error.
effect_part_vals, effect = self._ApplyTestOp(val, op, quoted, part_vals)
# NOTE: Splicing part_values is necessary because of code like
# ${undef:-'a b' c 'd # e'}. Each part_value can have a different
# do_glob/do_elide setting.
if effect == effect_e.SpliceParts:
return # EARLY RETURN, part_vals mutated
elif effect == effect_e.SpliceAndAssign:
if var_name is None:
# TODO: error context
e_die("Can't assign to special variable")
else:
# NOTE: This decays arrays too! 'set -o strict_array' could
# avoid it.
rhs_str = _DecayPartValuesToString(effect_part_vals,
self.splitter.GetJoinChar())
state.SetLocalString(self.mem, var_name, rhs_str)
return # EARLY RETURN, part_vals mutated
elif effect == effect_e.Error:
error_str = _DecayPartValuesToString(effect_part_vals,
self.splitter.GetJoinChar())
e_die("unset variable %r", error_str, token=part.token)
else:
pass # do nothing, may still be undefined
else:
val = self._EmptyStrOrError(val) # maybe error