/
word_eval.py
1188 lines (967 loc) · 36.6 KB
/
word_eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
word_eval.py - Evaluator for the word language.
"""
import pwd
from _devbuild.gen.id_kind_asdl import Id, Kind
from _devbuild.gen.syntax_asdl import (
word_e, bracket_op_e, suffix_op_e, word_part_e,
)
from _devbuild.gen.runtime_asdl import (
part_value, part_value_e, value, value_e, value_t, effect_e, arg_vector
)
from core import process
from core.meta import LookupKind
from core import util
from core.util import log, e_die
from frontend import match
from osh import braces
from osh import glob_
from osh import string_ops
from osh import state
from osh import word
from osh import word_compile
import posix_ as posix
# NOTE: Could be done with util.BackslashEscape like glob_.GlobEscape().
def _BackslashEscape(s):
"""Double up backslashes.
Useful for strings about to be globbed and strings about to be IFS escaped.
"""
return s.replace('\\', '\\\\')
def _ValueToPartValue(val, quoted):
"""Helper for VarSub evaluation.
Called by _EvalBracedVarSub and _EvalWordPart for SimpleVarSub.
"""
assert isinstance(val, value_t), val
if val.tag == value_e.Str:
return part_value.String(val.s, quoted, not quoted)
elif val.tag == value_e.StrArray:
return part_value.Array(val.strs)
elif val.tag == value_e.AssocArray:
# TODO: Is this correct?
return part_value.Array(val.d.values())
elif val.tag == value_e.Obj:
return part_value.String(repr(val.obj), quoted, not quoted)
else:
# Undef should be caught by _EmptyStrOrError().
raise AssertionError(val.__class__.__name__)
def _MakeWordFrames(part_vals):
"""
A word evaluates to a flat list of part_value (String or Array). frame is a
portion that results in zero or more args. It can never be joined. This
idea exists because of arrays like "$@" and "${a[@]}".
Args:
part_vals: array of part_value.
Returns:
List[Tuple[str, quoted, do_split]]. Each Tuple is called a "frame".
Example:
a=(1 '2 3' 4)
x=x
y=y
$x"${a[@]}"$y
Three frames:
[ ('x', False), ('1', True) ]
[ ('2 3', True) ]
[ ('4', True), ('y', False ]
"""
current = []
frames = [current]
for p in part_vals:
if p.tag == part_value_e.String:
current.append((p.s, p.quoted, p.do_split))
elif p.tag == part_value_e.Array:
for i, s in enumerate(s for s in p.strs if s is not None):
# Arrays parts are always quoted; otherwise they would have decayed to
# a string.
new = (s, True, False)
if i == 0:
current.append(new)
else:
current = [new]
frames.append(current) # singleton frame
else:
raise AssertionError(p.__class__.__name__)
return frames
# TODO: This could be _MakeWordFrames and then sep.join(). It's redunant.
def _DecayPartValuesToString(part_vals, join_char):
# Decay ${a=x"$@"x} to string.
out = []
for p in part_vals:
if p.tag == part_value_e.String:
out.append(p.s)
else:
out.append(join_char.join(s for s in p.strs if s is not None))
return ''.join(out)
def _PerformSlice(val, begin, length, part):
if val.tag == value_e.Str: # Slice UTF-8 characters in a string.
s = val.s
if begin < 0:
# It could be negative if we compute unicode length, but that's
# confusing.
# TODO: Instead of attributing it to the word part, it would be
# better if we attributed it to arith_expr begin.
raise util.InvalidSlice(
"The start index of a string slice can't be negative: %d",
begin, part=part)
byte_begin = string_ops.AdvanceUtf8Chars(s, begin, 0)
if length is None:
byte_end = len(s)
else:
if length < 0:
# TODO: Instead of attributing it to the word part, it would be
# better if we attributed it to arith_expr begin.
raise util.InvalidSlice(
"The length of a string slice can't be negative: %d",
length, part=part)
byte_end = string_ops.AdvanceUtf8Chars(s, length, byte_begin)
substr = s[byte_begin : byte_end]
val = value.Str(substr)
elif val.tag == value_e.StrArray: # Slice array entries.
# NOTE: This error is ALWAYS fatal in bash. It's inconsistent with
# strings.
if length and length < 0:
e_die("The length index of a array slice can't be negative: %d",
length, part=part)
# NOTE: unset elements don't count towards the length.
strs = []
for s in val.strs[begin:]:
if s is not None:
strs.append(s)
if len(strs) == length: # never true for unspecified length
break
val = value.StrArray(strs)
elif val.tag == value_e.AssocArray:
e_die("Can't slice associative arrays", part=part)
else:
raise NotImplementedError(val.__class__.__name__)
return val
class _WordEvaluator(object):
"""Abstract base class for word evaluators.
Public entry points:
EvalWordToString
EvalForPlugin
EvalRhsWord
EvalWordSequence
EvalWordSequence2
"""
def __init__(self, mem, exec_opts, exec_deps, arena):
self.mem = mem # for $HOME, $1, etc.
self.exec_opts = exec_opts # for nounset
self.splitter = exec_deps.splitter
self.prompt_ev = exec_deps.prompt_ev
self.arith_ev = exec_deps.arith_ev
self.errfmt = exec_deps.errfmt
self.globber = glob_.Globber(exec_opts)
# TODO: Consolidate into exec_deps. Executor also instantiates one.
def _EvalCommandSub(self, part, quoted):
"""Abstract since it has a side effect.
Args:
part: CommandSubPart
Returns:
part_value
"""
raise NotImplementedError
def _EvalProcessSub(self, part, id_):
"""Abstract since it has a side effect.
Args:
part: CommandSubPart
Returns:
part_value
"""
raise NotImplementedError
def _EvalTildeSub(self, token):
"""Evaluates ~ and ~user.
Args:
prefix: The tilde prefix (possibly empty)
"""
if token.val == '~':
# First look up the HOME var, then ask the OS. This is what bash does.
val = self.mem.GetVar('HOME')
if val.tag == value_e.Str:
return val.s
return process.GetHomeDir()
# For ~otheruser/src. TODO: Should this be cached?
# http://linux.die.net/man/3/getpwnam
name = token.val[1:]
try:
e = pwd.getpwnam(name)
except KeyError:
# If not found, it's ~nonexistente. TODO: In strict mode, this should be
# an error, kind of like failglob and nounset. Perhaps strict-tilde or
# even strict-word-eval.
result = token.val
else:
result = e.pw_dir
return result
def _EvalVarNum(self, var_num):
assert var_num >= 0
return self.mem.GetArgNum(var_num)
def _EvalSpecialVar(self, op_id, quoted):
"""Returns (val, bool maybe_decay_array).
TODO: Should that boolean be part of the value?
"""
# $@ is special -- it need to know whether it is in a double quoted
# context.
#
# - If it's $@ in a double quoted context, return an ARRAY.
# - If it's $@ in a normal context, return a STRING, which then will be
# subject to splitting.
if op_id in (Id.VSub_At, Id.VSub_Star):
argv = self.mem.GetArgv()
val = value.StrArray(argv)
if op_id == Id.VSub_At:
# "$@" evaluates to an array, $@ should be decayed
return val, not quoted
else: # $@ $* "$*"
return val, True
elif op_id == Id.VSub_Hyphen:
s = self.exec_opts.GetDollarHyphen()
return value.Str(s), False
else:
val = self.mem.GetSpecialVar(op_id)
return val, False # don't decay
def _ApplyTestOp(self, val, op, quoted, part_vals):
"""
Returns:
effect_part_vals, effect_e
${a:-} returns part_value[]
${a:+} returns part_value[]
${a:?error} returns error word?
${a:=} returns part_value[] but also needs self.mem for side effects.
So I guess it should return part_value[], and then a flag for raising an
error, and then a flag for assigning it?
The original BracedVarSub will have the name.
Example of needing multiple part_value[]
echo X-${a:-'def'"ault"}-X
We return two part values from the BracedVarSub. Also consider:
echo ${a:-x"$@"x}
"""
undefined = (val.tag == value_e.Undef)
# TODO: Change this to a bitwise test?
if op.op_id in (
Id.VTest_ColonHyphen, Id.VTest_ColonEquals, Id.VTest_ColonQMark,
Id.VTest_ColonPlus):
is_falsey = (
undefined or
(val.tag == value_e.Str and not val.s) or
(val.tag == value_e.StrArray and not val.strs)
)
else:
is_falsey = undefined
#print('!!',id, is_falsey)
if op.op_id in (Id.VTest_ColonHyphen, Id.VTest_Hyphen):
if is_falsey:
self._EvalWordToParts(op.arg_word, quoted, part_vals, is_subst=True)
return None, effect_e.SpliceParts
else:
return None, effect_e.NoOp
elif op.op_id in (Id.VTest_ColonPlus, Id.VTest_Plus):
# Inverse of the above.
if is_falsey:
return None, effect_e.NoOp
else:
self._EvalWordToParts(op.arg_word, quoted, part_vals, is_subst=True)
return None, effect_e.SpliceParts
elif op.op_id in (Id.VTest_ColonEquals, Id.VTest_Equals):
if is_falsey:
# Collect new part vals.
assign_part_vals = []
self._EvalWordToParts(op.arg_word, quoted, assign_part_vals,
is_subst=True)
# Append them to out param AND return them.
part_vals.extend(assign_part_vals)
return assign_part_vals, effect_e.SpliceAndAssign
else:
return None, effect_e.NoOp
elif op.op_id in (Id.VTest_ColonQMark, Id.VTest_QMark):
if is_falsey:
# The arg is the error mesage
error_part_vals = []
self._EvalWordToParts(op.arg_word, quoted, error_part_vals,
is_subst=True)
return error_part_vals, effect_e.Error
else:
return None, effect_e.NoOp
else:
raise NotImplementedError(id)
def _EvalIndirectArrayExpansion(self, name, index):
"""Expands ${!ref} when $ref has the form `name[index]`.
Args:
name, index: arbitrary strings
Returns:
value, or None if invalid
"""
if not match.IsValidVarName(name):
return None
val = self.mem.GetVar(name)
if val.tag == value_e.StrArray:
if index in ('@', '*'):
# TODO: maybe_decay_array
return value.StrArray(val.strs)
try:
index_num = int(index)
except ValueError:
return None
try:
return value.Str(val.strs[index_num])
except IndexError:
return value.Undef()
elif val.tag == value_e.AssocArray:
if index in ('@', '*'):
raise NotImplementedError
try:
return value.Str(val.d[index])
except KeyError:
return value.Undef()
elif val.tag == value_e.Undef:
return value.Undef()
elif val.tag == value_e.Str:
return None
else:
raise AssertionError
def _ApplyPrefixOp(self, val, op_id, token):
"""
Returns:
value
"""
assert val.tag != value_e.Undef
if op_id == Id.VSub_Pound: # LENGTH
if val.tag == value_e.Str:
# NOTE: Whether bash counts bytes or chars is affected by LANG
# environment variables.
# Should we respect that, or another way to select? set -o
# count-bytes?
# https://stackoverflow.com/questions/17368067/length-of-string-in-bash
try:
length = string_ops.CountUtf8Chars(val.s)
except util.InvalidUtf8 as e:
# TODO: Add location info from 'part'? Only the caller has it.
if self.exec_opts.strict_word_eval:
raise
else:
# NOTE: Doesn't make the command exit with 1; it just returns a
# length of -1.
self.errfmt.PrettyPrintError(e, prefix='warning: ')
return value.Str('-1')
elif val.tag == value_e.StrArray:
# There can be empty placeholder values in the array.
length = sum(1 for s in val.strs if s is not None)
return value.Str(str(length))
elif op_id == Id.VSub_Bang: # ${!foo}, "indirect expansion"
# NOTES:
# - Could translate to eval('$' + name) or eval("\$$name")
# - ${!array[@]} means something completely different. TODO: implement
# that.
# - It might make sense to suggest implementing this with associative
# arrays?
if val.tag == value_e.Str:
# plain variable name, like 'foo'
if match.IsValidVarName(val.s):
return self.mem.GetVar(val.s)
# positional argument, like '1'
try:
return self.mem.GetArgNum(int(val.s))
except ValueError:
pass
if val.s in ('@', '*'):
# TODO maybe_decay_array
return value.StrArray(self.mem.GetArgv())
# otherwise an array reference, like 'arr[0]' or 'arr[xyz]' or 'arr[@]'
i = val.s.find('[')
if i >= 0 and val.s[-1] == ']':
name, index = val.s[:i], val.s[i+1:-1]
result = self._EvalIndirectArrayExpansion(name, index)
if result is not None:
return result
# Note that bash doesn't consider this fatal. It makes the
# command exit with '1', but we don't have that ability yet?
e_die('Bad indirect expansion: %r', val.s, token=token)
elif val.tag == value_e.StrArray:
indices = [str(i) for i, s in enumerate(val.strs) if s is not None]
return value.StrArray(indices)
else:
raise AssertionError
else:
raise AssertionError(op_id)
def _ApplyUnarySuffixOp(self, val, op):
assert val.tag != value_e.Undef
op_kind = LookupKind(op.op_id)
if op_kind == Kind.VOp1:
# NOTE: glob syntax is supported in ^ ^^ , ,, ! As well as % %% # ##.
arg_val = self.EvalWordToString(op.arg_word, do_fnmatch=True)
assert arg_val.tag == value_e.Str
if val.tag == value_e.Str:
s = string_ops.DoUnarySuffixOp(val.s, op, arg_val.s)
#log('%r %r -> %r', val.s, arg_val.s, s)
new_val = value.Str(s)
else: # val.tag == value_e.StrArray:
# ${a[@]#prefix} is VECTORIZED on arrays. Oil should have this too.
strs = []
for s in val.strs:
if s is not None:
strs.append(string_ops.DoUnarySuffixOp(s, op, arg_val.s))
new_val = value.StrArray(strs)
else:
raise AssertionError(op_kind)
return new_val
def _EvalDoubleQuotedPart(self, part, part_vals):
"""DoubleQuotedPart -> part_value
Args:
part_vals: output param to append to.
"""
# Example of returning array:
# $ a=(1 2); b=(3); $ c=(4 5)
# $ argv "${a[@]}${b[@]}${c[@]}"
# ['1', '234', '5']
# Example of multiple parts
# $ argv "${a[@]}${undef[@]:-${c[@]}}"
# ['1', '24', '5']
#log('DQ part %s', part)
# Special case for "". The parser outputs (DoubleQuotedPart []), instead
# of (DoubleQuotedPart [LiteralPart '']). This is better but it means we
# have to check for it.
if not part.parts:
v = part_value.String('', True, False)
part_vals.append(v)
return
for p in part.parts:
self._EvalWordPart(p, part_vals, quoted=True)
def _DecayArray(self, val):
assert val.tag == value_e.StrArray, val
sep = self.splitter.GetJoinChar()
return value.Str(sep.join(s for s in val.strs if s is not None))
def _EmptyStrOrError(self, val, token=None):
assert isinstance(val, value_t), val
if val.tag == value_e.Undef:
if self.exec_opts.nounset:
if token is None:
e_die('Undefined variable')
else:
name = token.val[1:] if token.val.startswith('$') else token.val
e_die('Undefined variable %r', name, token=token)
else:
return value.Str('')
else:
return val
def _EmptyStrArrayOrError(self, token):
assert token is not None
if self.exec_opts.nounset:
e_die('Undefined array %r', token.val, token=token)
else:
return value.StrArray([])
def _EvalBracedVarSub(self, part, part_vals, quoted):
"""
Args:
part_vals: output param to append to.
"""
# We have four types of operator that interact.
#
# 1. Bracket: value -> (value, bool maybe_decay_array)
#
# 2. Then these four cases are mutually exclusive:
#
# a. Prefix length: value -> value
# b. Test: value -> part_value[]
# c. Other Suffix: value -> value
# d. no operator: you have a value
#
# That is, we don't have both prefix and suffix operators.
#
# 3. Process maybe_decay_array here before returning.
maybe_decay_array = False # for $*, ${a[*]}, etc.
var_name = None # For ${foo=default}
# 1. Evaluate from (var_name, var_num, token Id) -> value
if part.token.id == Id.VSub_Name:
var_name = part.token.val
val = self.mem.GetVar(var_name)
#log('EVAL NAME %s -> %s', var_name, val)
elif part.token.id == Id.VSub_Number:
var_num = int(part.token.val)
val = self._EvalVarNum(var_num)
else:
# $* decays
val, maybe_decay_array = self._EvalSpecialVar(part.token.id, quoted)
# 2. Bracket: value -> (value v, bool maybe_decay_array)
# maybe_decay_array is for joining ${a[*]} and unquoted ${a[@]} AFTER
# suffix ops are applied. If we take the length with a prefix op, the
# distinction is ignored.
if part.bracket_op:
if part.bracket_op.tag == bracket_op_e.WholeArray:
op_id = part.bracket_op.op_id
if op_id == Id.Lit_At:
if not quoted:
maybe_decay_array = True # ${a[@]} decays but "${a[@]}" doesn't
if val.tag == value_e.Undef:
val = self._EmptyStrArrayOrError(part.token)
elif val.tag == value_e.Str:
e_die("Can't index string with @: %r", val, part=part)
elif val.tag == value_e.StrArray:
# TODO: Is this a no-op? Just leave 'val' alone.
val = value.StrArray(val.strs)
elif op_id == Id.Arith_Star:
maybe_decay_array = True # both ${a[*]} and "${a[*]}" decay
if val.tag == value_e.Undef:
val = self._EmptyStrArrayOrError(part.token)
elif val.tag == value_e.Str:
e_die("Can't index string with *: %r", val, part=part)
elif val.tag == value_e.StrArray:
# TODO: Is this a no-op? Just leave 'val' alone.
# ${a[*]} or "${a[*]}" : maybe_decay_array is always true
val = value.StrArray(val.strs)
else:
raise AssertionError(op_id) # unknown
elif part.bracket_op.tag == bracket_op_e.ArrayIndex:
anode = part.bracket_op.expr
if val.tag == value_e.Undef:
pass # it will be checked later
elif val.tag == value_e.Str:
# Bash treats any string as an array, so we can't add our own
# behavior here without making valid OSH invalid bash.
e_die("Can't index string %r with integer", part.token.val,
token=part.token)
elif val.tag == value_e.StrArray:
index = self.arith_ev.Eval(anode)
try:
# could be None because representation is sparse
s = val.strs[index]
except IndexError:
s = None
if s is None:
val = value.Undef()
else:
val = value.Str(s)
elif val.tag == value_e.AssocArray:
key = self.arith_ev.Eval(anode, int_coerce=False)
try:
val = value.Str(val.d[key])
except KeyError:
val = value.Undef()
else:
raise AssertionError(val.__class__.__name__)
else:
raise AssertionError(part.bracket_op.tag)
if part.prefix_op:
val = self._EmptyStrOrError(val) # maybe error
val = self._ApplyPrefixOp(val, part.prefix_op, token=part.token)
# NOTE: When applying the length operator, we can't have a test or
# suffix afterward. And we don't want to decay the array
elif part.suffix_op:
op = part.suffix_op
if op.tag == suffix_op_e.StringNullary:
if op.op_id == Id.VOp0_P:
prompt = self.prompt_ev.EvalPrompt(val)
# readline gets rid of these, so we should too.
p = prompt.replace('\x01', '').replace('\x02', '')
val = value.Str(p)
elif op.op_id == Id.VOp0_Q:
val = value.Str(string_ops.ShellQuote(val.s))
else:
raise NotImplementedError(op.op_id)
elif op.tag == suffix_op_e.StringUnary:
if LookupKind(part.suffix_op.op_id) == Kind.VTest:
# TODO: Change style to:
# if self._ApplyTestOp(...)
# return
# It should return whether anything was done. If not, we continue to
# the end, where we might throw an error.
effect_part_vals, effect = self._ApplyTestOp(val, part.suffix_op,
quoted, part_vals)
# NOTE: Splicing part_values is necessary because of code like
# ${undef:-'a b' c 'd # e'}. Each part_value can have a different
# do_glob/do_elide setting.
if effect == effect_e.SpliceParts:
return # EARLY RETURN, part_vals mutated
elif effect == effect_e.SpliceAndAssign:
if var_name is None:
# TODO: error context
e_die("Can't assign to special variable")
else:
# NOTE: This decays arrays too! 'set -o strict_array' could
# avoid it.
rhs_str = _DecayPartValuesToString(effect_part_vals,
self.splitter.GetJoinChar())
state.SetLocalString(self.mem, var_name, rhs_str)
return # EARLY RETURN, part_vals mutated
elif effect == effect_e.Error:
error_str = _DecayPartValuesToString(effect_part_vals,
self.splitter.GetJoinChar())
e_die("unset variable %r", error_str, token=part.token)
else:
pass # do nothing, may still be undefined
else:
val = self._EmptyStrOrError(val) # maybe error
# Other suffix: value -> value
val = self._ApplyUnarySuffixOp(val, part.suffix_op)
elif op.tag == suffix_op_e.PatSub: # PatSub, vectorized
val = self._EmptyStrOrError(val) # ${undef//x/y}
# globs are supported in the pattern
pat_val = self.EvalWordToString(op.pat, do_fnmatch=True)
assert pat_val.tag == value_e.Str, pat_val
if op.replace:
replace_val = self.EvalWordToString(op.replace)
assert replace_val.tag == value_e.Str, replace_val
replace_str = replace_val.s
else:
replace_str = ''
regex, warnings = glob_.GlobToERE(pat_val.s)
if warnings:
# TODO:
# - Add 'set -o strict-glob' mode and expose warnings.
# "Glob is not in CANONICAL FORM".
# - Propagate location info back to the 'op.pat' word.
pass
replacer = string_ops.GlobReplacer(regex, replace_str, op.spids[0])
if val.tag == value_e.Str:
s = replacer.Replace(val.s, op)
val = value.Str(s)
elif val.tag == value_e.StrArray:
strs = []
for s in val.strs:
if s is not None:
strs.append(replacer.Replace(s, op))
val = value.StrArray(strs)
else:
raise AssertionError(val.__class__.__name__)
elif op.tag == suffix_op_e.Slice:
val = self._EmptyStrOrError(val) # ${undef:3:1}
if op.begin:
begin = self.arith_ev.Eval(op.begin)
else:
begin = 0
if op.length:
length = self.arith_ev.Eval(op.length)
else:
length = None
try:
val = _PerformSlice(val, begin, length, part)
except (util.InvalidSlice, util.InvalidUtf8) as e:
if self.exec_opts.strict_word_eval:
raise
else:
self.errfmt.PrettyPrintError(e, prefix='warning: ')
if val.tag == value_e.Str:
val = value.Str('')
elif val.tag == value_e.StrArray:
val = value.StrArray([])
else:
raise NotImplementedError
# After applying suffixes, process maybe_decay_array here.
if maybe_decay_array and val.tag == value_e.StrArray:
val = self._DecayArray(val)
# For the case where there are no prefix or suffix ops.
val = self._EmptyStrOrError(val)
# For example, ${a} evaluates to value.Str(), but we want a
# part_value.String().
part_val = _ValueToPartValue(val, quoted)
part_vals.append(part_val)
def _EvalWordPart(self, part, part_vals, quoted=False, is_subst=False):
"""Evaluate a word part.
Args:
part_vals: Output parameter.
Returns:
None
"""
if part.tag == word_part_e.ArrayLiteralPart:
raise AssertionError(
'Array literal should have been handled at word level')
elif part.tag == word_part_e.LiteralPart:
# Split if it's in a substitution.
# That is: echo is not split, but ${foo:-echo} is split
v = part_value.String(part.token.val, quoted, is_subst)
part_vals.append(v)
elif part.tag == word_part_e.EscapedLiteralPart:
val = part.token.val
assert len(val) == 2, val # e.g. \*
assert val[0] == '\\'
s = val[1]
v = part_value.String(s, True, False)
part_vals.append(v)
elif part.tag == word_part_e.SingleQuotedPart:
if part.left.id == Id.Left_SingleQuote:
s = ''.join(t.val for t in part.tokens)
elif part.left.id == Id.Left_DollarSingleQuote:
# NOTE: This could be done at compile time
# TODO: Add location info for invalid backslash
s = ''.join(word_compile.EvalCStringToken(t.id, t.val)
for t in part.tokens)
else:
raise AssertionError(part.left.id)
v = part_value.String(s, True, False)
part_vals.append(v)
elif part.tag == word_part_e.DoubleQuotedPart:
self._EvalDoubleQuotedPart(part, part_vals)
elif part.tag == word_part_e.CommandSubPart:
id_ = part.left_token.id
if id_ in (Id.Left_DollarParen, Id.Left_Backtick):
v = self._EvalCommandSub(part.command_list, quoted)
elif id_ in (Id.Left_ProcSubIn, Id.Left_ProcSubOut):
v = self._EvalProcessSub(part.command_list, id_)
else:
raise AssertionError(id_)
part_vals.append(v)
elif part.tag == word_part_e.SimpleVarSub:
maybe_decay_array = False
# 1. Evaluate from (var_name, var_num, token) -> defined, value
if part.token.id == Id.VSub_DollarName:
var_name = part.token.val[1:]
val = self.mem.GetVar(var_name)
elif part.token.id == Id.VSub_Number:
var_num = int(part.token.val[1:])
val = self._EvalVarNum(var_num)
else:
val, maybe_decay_array = self._EvalSpecialVar(part.token.id, quoted)
#log('SIMPLE %s', part)
val = self._EmptyStrOrError(val, token=part.token)
if maybe_decay_array and val.tag == value_e.StrArray:
val = self._DecayArray(val)
v = _ValueToPartValue(val, quoted)
part_vals.append(v)
elif part.tag == word_part_e.BracedVarSub:
self._EvalBracedVarSub(part, part_vals, quoted)
elif part.tag == word_part_e.TildeSubPart:
# We never parse a quoted string into a TildeSubPart.
assert not quoted
s = self._EvalTildeSub(part.token)
v = part_value.String(s, True, False) # NOT split even when unquoted!
part_vals.append(v)
elif part.tag == word_part_e.ArithSubPart:
num = self.arith_ev.Eval(part.anode)
v = part_value.String(str(num), quoted, not quoted)
part_vals.append(v)
elif part.tag == word_part_e.ExtGlobPart:
# Do NOT split these.
part_vals.append(part_value.String(part.op.val, False, False))
for i, w in enumerate(part.arms):
if i != 0:
part_vals.append(part_value.String('|', False, False)) # separator
# This flattens the tree!
self._EvalWordToParts(w, False, part_vals) # eval like not quoted?
part_vals.append(part_value.String(')', False, False)) # closing )
else:
raise AssertionError(part.__class__.__name__)
def _EvalWordToParts(self, word, quoted, part_vals, is_subst=False):
"""Helper for EvalRhsWord, EvalWordSequence, etc.
Returns:
List of part_value.
But note that this is a TREE.
"""
if word.tag == word_e.CompoundWord:
for p in word.parts:
self._EvalWordPart(p, part_vals, quoted=quoted, is_subst=is_subst)
elif word.tag == word_e.EmptyWord:
part_vals.append(part_value.String('', quoted, not quoted))
else:
raise AssertionError(word.__class__.__name__)
# Do we need this?
def EvalWordToPattern(self, word):
"""
Given a word, returns pattern.ERE if has an ExtGlobPart, or pattern.Fnmatch
otherwise.
NOTE: Have to handle nested extglob like: [[ foo == ${empty:-@(foo|bar) ]]
"""
pass
def EvalWordToString(self, word, do_fnmatch=False, do_ere=False):
"""
Args:
word: CompoundWord
Used for redirect arg, ControlFlow arg, ArithWord, BoolWord, etc.
do_fnmatch is true for case $pat and RHS of [[ == ]].
pat="*.py"
case $x in
$pat) echo 'matches glob pattern' ;;
"$pat") echo 'equal to glob string' ;; # must be glob escaped
esac
TODO: Raise AssertionError if it has ExtGlobPart.
"""
if word.tag == word_e.EmptyWord:
return value.Str('')
part_vals = []
for p in word.parts:
self._EvalWordPart(p, part_vals, quoted=False)
strs = []
for part_val in part_vals:
if part_val.tag == part_value_e.String:
# [[ foo == */"*".py ]] or case *.py) ... esac
if do_fnmatch and part_val.quoted:
s = glob_.GlobEscape(part_val.s)
elif do_ere and part_val.quoted:
s = glob_.ExtendedRegexEscape(part_val.s)
else:
s = part_val.s
else:
if self.exec_opts.strict_array:
# Examples: echo f > "$@"; local foo="$@"
# TODO: This attributes too coarsely, to the word rather than the
# parts. Problem: the word is a TREE of parts, but we only have a
# flat list of part_vals. The only case where we really get arrays
# is "$@", "${a[@]}", "${a[@]//pat/replace}", etc.
e_die("This word should evaluate to a string, but part of it was an "
"array", word=word)
# TODO: Maybe add detail like this.
#e_die('RHS of assignment should only have strings. '
# 'To assign arrays, use b=( "${a[@]}" )')
else:
# It appears to not respect IFS
s = ' '.join(s for s in part_val.strs if s is not None)
strs.append(s)
return value.Str(''.join(strs))
def EvalForPlugin(self, w):
"""Wrapper around EvalWordToString that prevents errors.
Runtime errors like $(( 1 / 0 )) and mutating $? like $(exit 42) are
handled here.
"""
self.mem.PushStatusFrame() # to "sandbox" $? and $PIPESTATUS
try:
val = self.EvalWordToString(w)
except util.FatalRuntimeError as e:
val = value.Str('<Runtime error: %s>' % e.UserErrorString())
except (OSError, IOError) as e:
# This is like the catch-all in Executor.ExecuteAndCatch().
val = value.Str('<I/O error: %s>' % posix.strerror(e.errno))