/
cmd_parse.py
2273 lines (1841 loc) · 69.3 KB
/
cmd_parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Copyright 2016 Andy Chu. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
"""
cmd_parse.py - Parse high level shell commands.
"""
from __future__ import print_function
from _devbuild.gen import grammar_nt
from _devbuild.gen.id_kind_asdl import Id, Id_t, Kind
from _devbuild.gen.types_asdl import lex_mode_t, lex_mode_e
from _devbuild.gen.syntax_asdl import (
condition, condition_t,
command, command_t,
command__Simple, command__DoGroup, command__ForExpr, command__ForEach,
command__WhileUntil, command__Case, command__If, command__ShFunction,
command__Subshell, command__DBracket, command__DParen,
command__CommandList, command__Proc,
BraceGroup,
case_arm,
sh_lhs_expr, sh_lhs_expr_t,
redir, redir_param, redir_param__HereDoc,
redir_loc, redir_loc_t,
word, word_e, word_t, compound_word, Token,
word_part_e, word_part_t,
assign_pair, env_pair,
assign_op_e,
source, parse_result, parse_result_t,
speck, name_type,
proc_sig_e, proc_sig__Closed,
)
from _devbuild.gen import syntax_asdl # token, etc.
from asdl import runtime
from core import alloc
from core import error
from core import ui
from core.pyerror import log, p_die
from frontend import consts
from frontend import match
from frontend import reader
from osh import braces
from osh import bool_parse
from osh import word_
from typing import Optional, List, Dict, Any, Tuple, cast, TYPE_CHECKING
if TYPE_CHECKING:
from core.alloc import Arena
from frontend.lexer import Lexer
from frontend.parse_lib import ParseContext, AliasesInFlight
from frontend.reader import _Reader
from osh.word_parse import WordParser
def _KeywordSpid(w):
# type: (word_t) -> int
"""
TODO: Can be we optimize this?
Assume that 'while', 'case', etc. are a specific type of compound_word.
I tested turning LeftMostSpanForWord in a no-op and couldn't observe the
difference on a ~500 ms parse of testdata/osh-runtime/abuild. So maybe this
doesn't make sense.
"""
return word_.LeftMostSpanForWord(w)
def _KeywordToken(UP_w):
# type: (word_t) -> Token
"""Given a word that IS A keyword, return the single token at the start.
In C++, this casts without checking, so BE CAREFUL to call it in the right context.
"""
assert UP_w.tag_() == word_e.Compound, UP_w
w = cast(compound_word, UP_w)
part = w.parts[0]
assert part.tag_() == word_part_e.Literal, part
return cast(Token, part)
def _ReadHereLines(line_reader, # type: _Reader
h, # type: redir
delimiter, # type: str
):
# type: (...) -> Tuple[List[Tuple[int, str, int]], Tuple[int, str, int]]
# NOTE: We read all lines at once, instead of parsing line-by-line,
# because of cases like this:
# cat <<EOF
# 1 $(echo 2
# echo 3) 4
# EOF
here_lines = [] # type: List[Tuple[int, str, int]]
last_line = None # type: Tuple[int, str, int]
strip_leading_tabs = (h.op.id == Id.Redir_DLessDash)
while True:
line_id, line, unused_offset = line_reader.GetLine()
if line is None: # EOF
# An unterminated here doc is just a warning in bash. We make it
# fatal because we want to be strict, and because it causes problems
# reporting other errors.
# Attribute it to the << in <<EOF for now.
p_die("Couldn't find terminator for here doc that starts here",
token=h.op)
assert len(line) != 0 # None should be the empty line
# If op is <<-, strip off ALL leading tabs -- not spaces, and not just
# the first tab.
start_offset = 0
if strip_leading_tabs:
n = len(line)
i = 0 # used after loop exit
while i < n:
if line[i] != '\t':
break
i += 1
start_offset = i
if line[start_offset:].rstrip() == delimiter:
last_line = (line_id, line, start_offset)
break
here_lines.append((line_id, line, start_offset))
return here_lines, last_line
def _MakeLiteralHereLines(here_lines, # type: List[Tuple[int, str, int]]
arena, # type: Arena
):
# type: (...) -> List[word_part_t] # less precise because List is invariant type
"""Create a line_span and a token for each line."""
tokens = [] # type: List[Token]
for line_id, line, start_offset in here_lines:
span_id = arena.AddLineSpan(line_id, start_offset, len(line))
t = Token(Id.Lit_Chars, span_id, line[start_offset:])
tokens.append(t)
parts = [cast(word_part_t, t) for t in tokens]
return parts
def _ParseHereDocBody(parse_ctx, r, line_reader, arena):
# type: (ParseContext, redir, _Reader, Arena) -> None
"""Fill in attributes of a pending here doc node."""
h = cast(redir_param__HereDoc, r.arg)
# "If any character in word is quoted, the delimiter shall be formed by
# performing quote removal on word, and the here-document lines shall not
# be expanded. Otherwise, the delimiter shall be the word itself."
# NOTE: \EOF counts, or even E\OF
ok, delimiter, delim_quoted = word_.StaticEval(h.here_begin)
if not ok:
p_die('Invalid here doc delimiter', word=h.here_begin)
here_lines, last_line = _ReadHereLines(line_reader, r, delimiter)
if delim_quoted: # << 'EOF'
# Literal for each line.
h.stdin_parts = _MakeLiteralHereLines(here_lines, arena)
else:
line_reader = reader.VirtualLineReader(here_lines, arena)
w_parser = parse_ctx.MakeWordParserForHereDoc(line_reader)
w_parser.ReadHereDocBody(h.stdin_parts) # fills this in
end_line_id, end_line, end_pos = last_line
# Create a span with the end terminator. Maintains the invariant that
# the spans "add up".
h.here_end_span_id = arena.AddLineSpan(end_line_id, end_pos, len(end_line))
def _MakeAssignPair(parse_ctx, preparsed, arena):
# type: (ParseContext, PreParsedItem, Arena) -> assign_pair
"""Create an assign_pair from a 4-tuples from DetectShAssignment."""
left_token, close_token, part_offset, w = preparsed
if left_token.id == Id.Lit_VarLike: # s=1
if left_token.val[-2] == '+':
var_name = left_token.val[:-2]
op = assign_op_e.PlusEqual
else:
var_name = left_token.val[:-1]
op = assign_op_e.Equal
tmp = sh_lhs_expr.Name(var_name)
tmp.spids.append(left_token.span_id)
lhs = cast(sh_lhs_expr_t, tmp)
elif left_token.id == Id.Lit_ArrayLhsOpen and parse_ctx.one_pass_parse:
var_name = left_token.val[:-1]
if close_token.val[-2] == '+':
op = assign_op_e.PlusEqual
else:
op = assign_op_e.Equal
left_spid = left_token.span_id + 1
right_spid = close_token.span_id
left_span = parse_ctx.arena.GetLineSpan(left_spid)
right_span = parse_ctx.arena.GetLineSpan(right_spid)
assert left_span.line_id == right_span.line_id, \
'%s and %s not on same line' % (left_span, right_span)
line = parse_ctx.arena.GetLine(left_span.line_id)
index_str = line[left_span.col : right_span.col]
lhs = sh_lhs_expr.UnparsedIndex(var_name, index_str)
elif left_token.id == Id.Lit_ArrayLhsOpen: # a[x++]=1
var_name = left_token.val[:-1]
if close_token.val[-2] == '+':
op = assign_op_e.PlusEqual
else:
op = assign_op_e.Equal
spid1 = left_token.span_id
spid2 = close_token.span_id
span1 = arena.GetLineSpan(spid1)
span2 = arena.GetLineSpan(spid2)
if span1.line_id == span2.line_id:
line = arena.GetLine(span1.line_id)
# extract what's between brackets
code_str = line[span1.col + span1.length : span2.col]
else:
raise NotImplementedError('%d != %d' % (span1.line_id, span2.line_id))
a_parser = parse_ctx.MakeArithParser(code_str)
src = source.LValue(left_token.span_id, close_token.span_id)
with alloc.ctx_Location(arena, src):
index_node = a_parser.Parse() # may raise error.Parse
tmp3 = sh_lhs_expr.IndexedName(var_name, index_node)
tmp3.spids.append(left_token.span_id)
lhs = cast(sh_lhs_expr_t, tmp3)
else:
raise AssertionError()
# TODO: Should we also create a rhs_expr.ArrayLiteral here?
n = len(w.parts)
if part_offset == n:
rhs = word.Empty() # type: word_t
else:
# tmp2 is for intersection of C++/MyPy type systems
tmp2 = compound_word(w.parts[part_offset:])
word_.TildeDetectAssign(tmp2)
rhs = tmp2
pair = syntax_asdl.assign_pair(lhs, op, rhs, [left_token.span_id])
return pair
def _AppendMoreEnv(preparsed_list, more_env):
# type: (PreParsedList, List[env_pair]) -> None
"""Helper to modify a SimpleCommand node.
Args:
preparsed: a list of 4-tuples from DetectShAssignment
more_env: a list to append env_pairs to
"""
for left_token, _, part_offset, w in preparsed_list:
if left_token.id != Id.Lit_VarLike: # can't be a[x]=1
p_die("Environment binding shouldn't look like an array assignment",
token=left_token)
if left_token.val[-2] == '+':
p_die('Expected = in environment binding, got +=', token=left_token)
var_name = left_token.val[:-1]
n = len(w.parts)
if part_offset == n:
val = word.Empty() # type: word_t
else:
val = compound_word(w.parts[part_offset:])
pair = syntax_asdl.env_pair(var_name, val, [left_token.span_id])
more_env.append(pair)
if TYPE_CHECKING:
PreParsedItem = Tuple[Token, Optional[Token], int, compound_word]
PreParsedList = List[PreParsedItem]
def _SplitSimpleCommandPrefix(words):
# type: (List[compound_word]) -> Tuple[PreParsedList, List[compound_word]]
"""Second pass of SimpleCommand parsing: look for assignment words."""
preparsed_list = [] # type: PreParsedList
suffix_words = [] # type: List[compound_word]
done_prefix = False
for w in words:
if done_prefix:
suffix_words.append(w)
continue
left_token, close_token, part_offset = word_.DetectShAssignment(w)
if left_token:
preparsed_list.append((left_token, close_token, part_offset, w))
else:
done_prefix = True
suffix_words.append(w)
return preparsed_list, suffix_words
def _MakeSimpleCommand(preparsed_list, suffix_words, redirects, block):
# type: (PreParsedList, List[compound_word], List[redir], Optional[BraceGroup]) -> command__Simple
"""Create an command.Simple node."""
# FOO=(1 2 3) ls is not allowed.
for _, _, _, w in preparsed_list:
if word_.HasArrayPart(w):
p_die("Environment bindings can't contain array literals", word=w)
# NOTE: It would be possible to add this check back. But it already happens
# at runtime in EvalWordSequence2.
# echo FOO=(1 2 3) is not allowed (but we should NOT fail on echo FOO[x]=1).
if 0:
for w in suffix_words:
if word_.HasArrayPart(w):
p_die("Commands can't contain array literals", word=w)
# NOTE: We only do brace DETECTION here, not brace EXPANSION. Therefore we
# can't implement bash's behavior of having say {~bob,~jane}/src work,
# because we only have a BracedTree.
# This is documented in spec/brace-expansion.
# NOTE: Technically we could do expansion outside of 'oshc translate', but it
# doesn't seem worth it.
words2 = braces.BraceDetectAll(suffix_words)
words3 = word_.TildeDetectAll(words2)
more_env = [] # type: List[env_pair]
_AppendMoreEnv(preparsed_list, more_env)
# do_fork by default
node = command.Simple(words3, redirects, more_env, block, True)
return node
class VarChecker(object):
"""Statically check for proc and variable usage errors."""
def __init__(self):
# type: () -> None
"""
Args:
oil_proc: Whether to disallow nested proc/function declarations
"""
# tokens has 'proc' or some other token
self.tokens = [] # type: List[Token]
self.names = [{}] # type: List[Dict[str, Id_t]]
def Push(self, blame_tok):
# type: (Token) -> None
"""
Bash allows this, but it's confusing because it's the same as two functions
at the top level.
f() {
g() {
echo 'top level function defined in another one'
}
}
Oil disallows nested procs.
"""
if len(self.tokens) != 0:
if self.tokens[0].id == Id.KW_Proc or blame_tok.id == Id.KW_Proc:
p_die("procs can't contain other procs or functions", token=blame_tok)
self.tokens.append(blame_tok)
entry = {} # type: Dict[str, Id_t]
self.names.append(entry)
def Pop(self):
# type: () -> None
self.names.pop()
self.tokens.pop()
def Check(self, keyword_id, name_tok):
# type: (Id_t, Token) -> None
"""Check for errors in declaration and mutation errors.
var x, const x:
x already declared
setlocal x: (must be var)
x is not declared
x is constant
setvar x:
x is constant (only for locals)
setglobal x:
I don't think any errors are possible
We would have to have many conditions to statically know the names:
- no 'source'
- shopt -u copy_env.
- AND use lib has to be static
LATER:
setref x:
Should only mutate out params
"""
top = self.names[-1]
name = name_tok.val
if keyword_id in (Id.KW_Const, Id.KW_Var):
if name in top:
p_die('%r was already declared', name, token=name_tok)
else:
top[name] = keyword_id
if keyword_id in (Id.KW_Set, Id.KW_SetLocal):
if name not in top:
p_die("%r hasn't been declared", name, token=name_tok)
if keyword_id in (Id.KW_Set, Id.KW_SetLocal, Id.KW_SetVar):
if name in top and top[name] == Id.KW_Const:
p_die("Can't modify constant %r", name, token=name_tok)
# TODO: setref.
class ctx_VarChecker(object):
def __init__(self, var_checker, blame_tok):
# type: (VarChecker, Token) -> None
var_checker.Push(blame_tok)
self.var_checker = var_checker
def __enter__(self):
# type: () -> None
pass
def __exit__(self, type, value, traceback):
# type: (Any, Any, Any) -> None
self.var_checker.Pop()
SECONDARY_KEYWORDS = [
Id.KW_Do, Id.KW_Done, Id.KW_Then, Id.KW_Fi, Id.KW_Elif, Id.KW_Else, Id.KW_Esac
]
class CommandParser(object):
"""
Args:
word_parse: to get a stream of words
lexer: for lookahead in function def, PushHint of ()
line_reader: for here doc
"""
def __init__(self, parse_ctx, w_parser, lexer, line_reader):
# type: (ParseContext, WordParser, Lexer, _Reader) -> None
self.parse_ctx = parse_ctx
self.aliases = parse_ctx.aliases # aliases to expand at parse time
self.w_parser = w_parser # type: WordParser # for normal parsing
self.lexer = lexer # for pushing hints, lookahead to (
self.line_reader = line_reader # for here docs
self.arena = parse_ctx.arena # for adding here doc and alias spans
self.eof_id = Id.Eof_Real
self.aliases_in_flight = [] # type: AliasesInFlight
# A hacky boolean to remove 'if cd / {' ambiguity.
self.allow_block = True
self.parse_opts = parse_ctx.parse_opts
self.var_checker = VarChecker()
self.Reset()
# These two Init_() functions simulate "keywords args" in C++.
def Init_EofId(self, eof_id):
# type: (Id_t) -> None
self.eof_id = eof_id
def Init_AliasesInFlight(self, aliases_in_flight):
# type: (AliasesInFlight) -> None
self.aliases_in_flight = aliases_in_flight
def Reset(self):
# type: () -> None
"""Reset our own internal state.
Called by the interactive loop.
"""
# Cursor state set by _Peek()
self.next_lex_mode = lex_mode_e.ShCommand
self.cur_word = None # type: word_t # current word
self.c_kind = Kind.Undefined
self.c_id = Id.Undefined_Tok
self.pending_here_docs = [] # type: List[redir] # should have HereLiteral arg
def ResetInputObjects(self):
# type: () -> None
"""Reset the internal state of our inputs.
Called by the interactive loop.
"""
self.w_parser.Reset()
self.lexer.ResetInputObjects()
self.line_reader.Reset()
def _Next(self, lex_mode=lex_mode_e.ShCommand):
# type: (lex_mode_t) -> None
"""Helper method."""
self.next_lex_mode = lex_mode
def _Peek(self):
# type: () -> None
"""Helper method.
Returns True for success and False on error. Error examples: bad command
sub word, or unterminated quoted string, etc.
"""
if self.next_lex_mode != lex_mode_e.Undefined:
w = self.w_parser.ReadWord(self.next_lex_mode)
# Here docs only happen in command mode, so other kinds of newlines don't
# count.
if w.tag_() == word_e.Token:
tok = cast(Token, w)
if tok.id == Id.Op_Newline:
for h in self.pending_here_docs:
_ParseHereDocBody(self.parse_ctx, h, self.line_reader, self.arena)
del self.pending_here_docs[:] # No .clear() until Python 3.3.
self.cur_word = w
self.c_kind = word_.CommandKind(self.cur_word)
self.c_id = word_.CommandId(self.cur_word)
self.next_lex_mode = lex_mode_e.Undefined
def _Eat(self, c_id):
# type: (Id_t) -> None
actual_id = word_.CommandId(self.cur_word)
msg = 'Expected word type %s, got %s' % (
ui.PrettyId(c_id), ui.PrettyId(actual_id)
)
self._Eat2(c_id, msg)
def _Eat2(self, c_id, msg):
# type: (Id_t, str) -> None
"""Consume a word of a type. If it doesn't match, return False.
Args:
c_id: the Id we expected
msg: improved error message
"""
self._Peek()
# TODO: Printing something like KW_Do is not friendly. We can map
# backwards using the _KEYWORDS list in frontend/lexer_def.py.
if self.c_id != c_id:
p_die(msg, word=self.cur_word)
self._Next()
def _NewlineOk(self):
# type: () -> None
"""Check for optional newline and consume it."""
self._Peek()
if self.c_id == Id.Op_Newline:
self._Next()
self._Peek()
def _AtSecondaryKeyword(self):
# type: () -> bool
if self.c_id in SECONDARY_KEYWORDS:
return True
return False
def ParseRedirect(self):
# type: () -> redir
self._Peek()
assert self.c_kind == Kind.Redir, self.cur_word
op_tok = cast(Token, self.cur_word) # for MyPy
op_val = op_tok.val
if op_val[0] == '{':
pos = op_val.find('}')
assert pos != -1 # lexer ensures thsi
loc = redir_loc.VarName(op_val[1:pos]) # type: redir_loc_t
elif op_val[0].isdigit():
pos = 1
if op_val[1].isdigit():
pos = 2
loc = redir_loc.Fd(int(op_val[:pos]))
else:
loc = redir_loc.Fd(consts.RedirDefaultFd(op_tok.id))
self._Next()
self._Peek()
# Here doc
if op_tok.id in (Id.Redir_DLess, Id.Redir_DLessDash):
#log('after here doc %s', self.cur_word)
# TODO: if the arg_word is TripleQuoted, then we don't have a pending
# here doc. We read the whole thing!
#
# Idea:
# cmd_token = Token %Token | Word %compound_word | TripleQuoted %triple_quoted
#
# To store in treeremove one, and add two
# word = Token %Token | Word %compound_word | Empty | BracedTree
#
# bool_token = Word %compound_word | Token %Token | String ?
# word_compile can have 2 routines:
# HereDoc -> compound_word
# TripleQuoted -> compound_word
# These are the same syntax
#
# Another option:
# mutate word_part.Literal %Token -> word_part.Stripped(int dedent, Token tok)
# then we may not need triple_quoted
arg = redir_param.HereDoc()
arg.here_begin = self.cur_word
r = redir(op_tok, loc, arg)
self.pending_here_docs.append(r) # will be filled on next newline.
self._Next()
return r
# Other redirect
if self.c_kind != Kind.Word:
p_die('Invalid token after redirect operator', word=self.cur_word)
arg_word = self.cur_word
tilde = word_.TildeDetect(arg_word)
if tilde:
arg_word = tilde
self._Next()
# We should never get Empty, Token, etc.
assert arg_word.tag_() == word_e.Compound, arg_word
return redir(op_tok, loc, cast(compound_word, arg_word))
def _ParseRedirectList(self):
# type: () -> List[redir]
"""Try parsing any redirects at the cursor.
This is used for blocks only, not commands.
Return None on error.
"""
redirects = [] # type: List[redir]
while True:
self._Peek()
# This prediction needs to ONLY accept redirect operators. Should we
# make them a separate TokeNkind?
if self.c_kind != Kind.Redir:
break
node = self.ParseRedirect()
redirects.append(node)
self._Next()
return redirects
def _ScanSimpleCommand(self):
# type: () -> Tuple[List[redir], List[compound_word], Optional[BraceGroup]]
"""First pass: Split into redirects and words."""
redirects = [] # type: List[redir]
words = [] # type: List[compound_word]
block = None # type: Optional[BraceGroup]
while True:
self._Peek()
if self.c_kind == Kind.Redir:
node = self.ParseRedirect()
redirects.append(node)
elif self.c_kind == Kind.Word:
if self.parse_opts.parse_brace():
# Treat { and } more like operators
if self.c_id == Id.Lit_LBrace:
if self.allow_block: # Disabled for if/while condition, etc.
blame_tok = _KeywordToken(self.cur_word)
# Our own scope for 'var'
with ctx_VarChecker(self.var_checker, blame_tok):
block = self.ParseBraceGroup()
if 0:
print('--')
block.PrettyPrint()
print('\n--')
break
elif self.c_id == Id.Lit_RBrace:
# Another thing: { echo hi }
# We're DONE!!!
break
if self.parse_opts.parse_triple_dots():
# TODO: If the first word is ... , invoke ParsePipeline()
# But call:
# self.w_parser.PushTripleDots()
# self.w_parser.PopTripleDots()
#
# Which changes the behavior of ReadWord?
# \n -> space
# \n comment \n -> space
# \n space \n -> \n
# IDs involved: Op_Newline, WS_Space, and maybe Id.Lit_Comment
pass
w = cast(compound_word, self.cur_word) # Kind.Word ensures this
words.append(w)
elif self.parse_opts.parse_amp() and self.c_id == Id.Op_Amp:
# TODO:
# myprog &2 > &1 should be parsed
p_die('TODO: Parse Redirect', word=self.cur_word)
else:
break
self._Next()
return redirects, words, block
def _MaybeExpandAliases(self, words):
# type: (List[compound_word]) -> Optional[command_t]
"""Try to expand aliases.
Args:
words: A list of Compound
Returns:
A new LST node, or None.
Our implementation of alias has two design choices:
- Where to insert it in parsing. We do it at the end of ParseSimpleCommand.
- What grammar rule to parse the expanded alias buffer with. In our case
it's ParseCommand().
This doesn't quite match what other shells do, but I can't figure out a
better places.
Most test cases pass, except for ones like:
alias LBRACE='{'
LBRACE echo one; echo two; }
alias MULTILINE='echo 1
echo 2
echo 3'
MULTILINE
NOTE: dash handles aliases in a totally diferrent way. It has a global
variable checkkwd in parser.c. It assigns it all over the grammar, like
this:
checkkwd = CHKNL | CHKKWD | CHKALIAS;
The readtoken() function checks (checkkwd & CHKALIAS) and then calls
lookupalias(). This seems to provide a consistent behavior among shells,
but it's less modular and testable.
Bash also uses a global 'parser_state & PST_ALEXPNEXT'.
Returns:
A command node if any aliases were expanded, or None otherwise.
"""
# Start a new list if there aren't any. This will be passed recursively
# through CommandParser instances.
aliases_in_flight = (
self.aliases_in_flight if len(self.aliases_in_flight) else []
)
# for error message
first_word_str = None # type: Optional[str]
argv0_spid = word_.LeftMostSpanForWord(words[0])
expanded = [] # type: List[str]
i = 0
n = len(words)
while i < n:
w = words[i]
ok, word_str, quoted = word_.StaticEval(w)
if not ok or quoted:
break
alias_exp = self.aliases.get(word_str)
if alias_exp is None:
break
# Prevent infinite loops. This is subtle: we want to prevent infinite
# expansion of alias echo='echo x'. But we don't want to prevent
# expansion of the second word in 'echo echo', so we add 'i' to
# "aliases_in_flight".
if (word_str, i) in aliases_in_flight:
break
if i == 0:
first_word_str = word_str # for error message
#log('%r -> %r', word_str, alias_exp)
aliases_in_flight.append((word_str, i))
expanded.append(alias_exp)
i += 1
if not alias_exp.endswith(' '):
# alias e='echo [ ' is the same expansion as
# alias e='echo ['
# The trailing space indicates whether we should continue to expand
# aliases; it's not part of it.
expanded.append(' ')
break # No more expansions
if len(expanded) == 0: # No expansions; caller does parsing.
return None
# We got some expansion. Now copy the rest of the words.
# We need each NON-REDIRECT word separately! For example:
# $ echo one >out two
# dash/mksh/zsh go beyond the first redirect!
while i < n:
w = words[i]
spid1 = word_.LeftMostSpanForWord(w)
spid2 = word_.RightMostSpanForWord(w)
span1 = self.arena.GetLineSpan(spid1)
span2 = self.arena.GetLineSpan(spid2)
if 0:
log('spid1 = %d, spid2 = %d', spid1, spid2)
n1 = self.arena.GetLineNumber(span1.line_id)
n2 = self.arena.GetLineNumber(span2.line_id)
log('span1 %s line %d %r', span1, n1, self.arena.GetLine(span1.line_id))
log('span2 %s line %d %r', span2, n2, self.arena.GetLine(span2.line_id))
if span1.line_id == span2.line_id:
line = self.arena.GetLine(span1.line_id)
piece = line[span1.col : span2.col + span2.length]
expanded.append(piece)
else:
# NOTE: The xrange(left_spid, right_spid) algorithm won't work for
# commands like this:
#
# myalias foo`echo hi`bar
#
# That is why we only support words over 1 or 2 lines.
raise NotImplementedError(
'line IDs %d != %d' % (span1.line_id, span2.line_id))
expanded.append(' ') # Put space back between words.
i += 1
code_str = ''.join(expanded)
# NOTE: self.arena isn't correct here. Breaks line invariant.
line_reader = reader.StringLineReader(code_str, self.arena)
cp = self.parse_ctx.MakeOshParser(line_reader)
cp.Init_AliasesInFlight(aliases_in_flight)
# break circular dep
from frontend import parse_lib
# The interaction between COMPLETION and ALIASES requires special care.
# See docstring of BeginAliasExpansion() in parse_lib.py.
src = source.Alias(first_word_str, argv0_spid)
with alloc.ctx_Location(self.arena, src):
with parse_lib.ctx_Alias(self.parse_ctx.trail):
try:
# _ParseCommandTerm() handles multiline commands, compound commands, etc.
# as opposed to ParseLogicalLine()
node = cp._ParseCommandTerm()
except error.Parse as e:
# Failure to parse alias expansion is a fatal error
# We don't need more handling here/
raise
if 0:
log('AFTER expansion:')
node.PrettyPrint()
return node
def ParseSimpleCommand(self):
# type: () -> command_t
"""
Fixed transcription of the POSIX grammar (TODO: port to grammar/Shell.g)
io_file : '<' filename
| LESSAND filename
...
io_here : DLESS here_end
| DLESSDASH here_end
redirect : IO_NUMBER (io_redirect | io_here)
prefix_part : ASSIGNMENT_WORD | redirect
cmd_part : WORD | redirect
assign_kw : Declare | Export | Local | Readonly
# Without any words it is parsed as a command, not an assigment
assign_listing : assign_kw
# Now we have something to do (might be changing assignment flags too)
# NOTE: any prefixes should be a warning, but they are allowed in shell.
assignment : prefix_part* assign_kw (WORD | ASSIGNMENT_WORD)+
# an external command, a function call, or a builtin -- a "word_command"
word_command : prefix_part* cmd_part+
simple_command : assign_listing
| assignment
| proc_command
Simple imperative algorithm:
1) Read a list of words and redirects. Append them to separate lists.
2) Look for the first non-assignment word. If it's declare, etc., then
keep parsing words AND assign words. Otherwise, just parse words.
3) If there are no non-assignment words, then it's a global assignment.
{ redirects, global assignments } OR
{ redirects, prefix_bindings, words } OR
{ redirects, ERROR_prefix_bindings, keyword, assignments, words }
THEN CHECK that prefix bindings don't have any array literal parts!
global assignment and keyword assignments can have the of course.
well actually EXPORT shouldn't have them either -- WARNING
3 cases we want to warn: prefix_bindings for assignment, and array literal
in prefix bindings, or export
A command can be an assignment word, word, or redirect on its own.
ls
>out.txt
>out.txt FOO=bar # this touches the file, and hten
Or any sequence:
ls foo bar
<in.txt ls foo bar >out.txt
<in.txt ls >out.txt foo bar
Or add one or more environment bindings:
VAR=val env
>out.txt VAR=val env
here_end vs filename is a matter of whether we test that it's quoted. e.g.
<<EOF vs <<'EOF'.
"""
redirects, words, block = self._ScanSimpleCommand()
block_spid = block.spids[0] if block else runtime.NO_SPID
if len(words) == 0: # e.g. >out.txt # redirect without words
if block:
p_die("Unexpected block", span_id=block_spid)
simple = command.Simple() # no words, more_env, or block,
simple.redirects = redirects
return simple
# Disallow =a because it's confusing
part0 = words[0].parts[0]
if part0.tag_() == word_part_e.Literal:
tok = cast(Token, part0)
if tok.id == Id.Lit_Equals:
p_die("=word isn't allowed when shopt 'parse_equals' is on.\n"
"Hint: add a space after = to pretty print an expression", token=tok)
preparsed_list, suffix_words = _SplitSimpleCommandPrefix(words)
if self.parse_opts.parse_equals() and len(preparsed_list):
left_token, _, _, _ = preparsed_list[0]
p_die("name=val isn't allowed when shopt 'parse_equals' is on.\n"
"Hint: add 'env' before it, or spaces around =", token=left_token)
# Set a reference to words and redirects for completion. We want to
# inspect this state after a failed parse.
self.parse_ctx.trail.SetLatestWords(suffix_words, redirects)
if len(suffix_words) == 0:
if block:
p_die("Unexpected block", span_id=block_spid)
# ShAssignment: No suffix words like ONE=1 a[x]=1 TWO=2
pairs = [] # type: List[assign_pair]
for preparsed in preparsed_list:
pairs.append(_MakeAssignPair(self.parse_ctx, preparsed, self.arena))
assign = command.ShAssignment(pairs, redirects)
left_spid = word_.LeftMostSpanForWord(words[0])
assign.spids.append(left_spid) # no keyword spid to skip past
return assign
kind, kw_token = word_.KeywordToken(suffix_words[0])