From 5d56b35512cbcbdd3bd77b84c580ec8c76b92603 Mon Sep 17 00:00:00 2001 From: Rob Landley Date: Thu, 8 Jun 2023 18:04:50 -0500 Subject: [PATCH] Redo trailing \ handling, overhaul HERE document plumbing, handle undelimited redirects (I.E. cat /dev/full\n' E E"$P" X1 shxpect "wait for <(exit)" I$'cat <(echo hello 1>&2)\n' E$'hello\n' E"$P" -# Test the sh -c stuff before changing EVAL +# Test shell command line (-c and how scripts run) before changing EVAL testing '-c "" exit status 0' '$SH -c "" && echo $?' '0\n' '' '' testing '-c args' "\$SH -c 'echo \$0,\$1,\$2,\$3' one two three four five" \ "one,two,three,four\n" "" "" @@ -72,6 +39,12 @@ testing '-c arg split2' \ "$SH -c 'for i in a\"\$* \$@\"b; do echo =\$i=;done' one two three four five"\ "=atwo three four five two=\n=three=\n=four=\n=fiveb=\n" "" "" testing '-c arg count' "$SH -c 'echo \$#' 9 8 7 6 1 2 3 4" "7\n" "" "" +testing 'trailing \' "$SH -c 'echo \'" '\\\n' '' '' +testing "trailing \\ in ''" "$SH -c \$'echo \\'one\\\\\\ntwo\\''" \ + 'one\\\ntwo\n' '' '' +testing 'trailing \ in ""' "$SH -c \$'echo \"one\\\\\\ntwo\"'" 'onetwo\n' \ + '' '' +testing 'vanishing \' "$SH -c \$'echo \\\\\\n a'" 'a\n' '' '' testing "exec3" '$C -c "{ exec readlink /proc/self/fd/0;} < /proc/self/exe"' \ "$(readlink -f $C)\n" "" "" testing 'arg shift' "$SH -c '"'for i in "" 2 1 1 1; do echo $? $1; shift $i; done'"' one two three four five" \ @@ -121,6 +94,8 @@ testing "eval0" "$SH -c 'eval echo \$*' one two three" "two three\n" "" "" # Change EVAL to call sh -c for us, using "bash" explicitly for the host. export EVAL="timeout 10 $SH -c" +# From here on, tests run within the new shell by default. + testing 'return code' 'if false; then echo true; fi; echo $?' '0\n' '' '' testing 'return code 2' 'if true; then false; fi; echo $?' '1\n' '' '' testing 'return code 3' 'x=0; while [ $((x++)) -lt 2 ]; do echo $x; done; echo $?' '1\n2\n0\n' '' '' @@ -130,8 +105,12 @@ testing 'local var +whiteout' \ testing 'escape passthrough' 'echo -e "a\nb\nc\td"' 'a\nb\nc\td\n' '' '' testing 'trailing $ is literal' 'echo $' '$\n' '' '' +testing 'work after HERE' $'cat<<0;echo hello\npotato\n0' 'potato\nhello\n' '' '' +testing '<<""' $'cat<<"";echo hello\npotato\n\necho huh' 'potato\nhello\nhuh\n'\ + '' '' # TODO testing 'empty +() is literal' 'echo +()' '+()\n' '' '' +# shxpect "EOF" I$'< " I$'0\n' O$'hello\n' shxpect '$_ preserved on assignment error' I$'true hello; a=1 b=2 c=${}\n' \ E E"$P" I$'echo $_\n' O$'hello\n' @@ -471,6 +450,7 @@ testing 'sequence check' 'IFS=x; X=abxcd; echo ${X/bxc/g}' 'agd\n' '' '' shxpect '${ with newline' I$'HELLO=abc; echo ${HELLO/b/\n' E"> " I$'}\n' O$'a c\n' +testing 'here0' 'cat<< " \ I$'$POTATO\n' E"> " I$'EOF\n' O$'123\n' shxpect 'here2' I$'POTATO=123; cat << E"O"F\n' E"> " \ @@ -739,3 +719,36 @@ testing '[[~]]' '[[ ~ == $HOME ]] && echo yes' 'yes\n' '' '' #+ *) SKIPNEXT=1 ;; #+ esac +# TODO: categorize tests + +# TODO https://mywiki.wooledge.org/BashFAQ +# http://tiswww.case.edu/php/chet/bash/FAQ +# https://mywiki.wooledge.org/BashPitfalls#set_-euo_pipefail + +# // ${#} ${#x} ${#@} ${#x[@]} ${#!} ${!#} +# // ${!} ${!@} ${!@Q} ${!x} ${!x@} ${!x@Q} ${!x#} ${!x[} ${!x[*]} + +# Looked like a prefix but wasn't: three chars (@ # -) are both paremeter name +# and slice operator. When immediately followed by } it's parameter, otherwise +# we did NOT have a prefix and it's an operator. +# +# ${#-} ${#-abc} +# ${##} ${##0} +# ${#@} ${#@Q} +# +# backslash not discarded: echo "abc\"def" + +# ${x:-y} use default +# ${x:=y} assign default (error if positional) +# ${x:?y} err if null +# ${x:+y} alt value +# ${x:off} ${x:off:len} off<0 from end (must ": -"), len<0 also from end must +# 0-based indexing +# ${@:off:len} positional parameters, off -1 = len, -len is error +# 1-based indexing + +# [] wins over +() +# touch 'AB[DEF]'; echo AB[+(DEF]) AB[+(DEF)? +# AB[+(DEF]) AB[DEF] + +# Testing shell corner cases _within_ a shell script is kind of hard. diff --git a/toys/pending/sh.c b/toys/pending/sh.c index 36f183b87..93706e843 100644 --- a/toys/pending/sh.c +++ b/toys/pending/sh.c @@ -1069,10 +1069,9 @@ static char *skip_redir_prefix(char *word) // parse next word from command line. Returns end, or 0 if need continuation // caller eats leading spaces. early = skip one quote block (or return start) -// quote is depth of existing quote stack in toybuf (usually 0) -static char *parse_word(char *start, int early, int quote) +static char *parse_word(char *start, int early) { - int ii, qq, qc = 0; + int ii, qq, qc = 0, quote = 0; char *end = start, *ss; // Handle redirections, <(), (( )) that only count at the start of word @@ -1115,6 +1114,7 @@ static char *parse_word(char *start, int early, int quote) if (isspace(*end)) break; ss = end + anystart(end, (char *[]){";;&", ";;", ";&", ";", "||", "|&", "|", "&&", "&", "(", ")", 0}); + if (ss==end) ss += anystart(end, (void *)redirectors); if (ss!=end) return (end==start) ? ss : end; } @@ -1123,16 +1123,15 @@ static char *parse_word(char *start, int early, int quote) // \? $() ${} $[] ?() *() +() @() !() else { - if (ii=='\\') { - if (!*end || (*end=='\n' && !end[1])) return early ? end : 0; - } else if (ii=='$' && -1!=(qq = stridx("({[", *end))) { + if (ii=='$' && -1!=(qq = stridx("({[", *end))) { if (strstart(&end, "((")) { end--; toybuf[quote++] = 255; } else toybuf[quote++] = ")}]"[qq]; } else if (*end=='(' && strchr("?*+@!", ii)) toybuf[quote++] = ')'; else { - end--; + if (ii!='\\') end--; + else if (!end[*end=='\n']) return *end ? 0 : end; if (early && !quote) return end; } end++; @@ -1854,7 +1853,7 @@ static int expand_arg_nobrace(struct sh_arg *arg, char *str, unsigned flags, off_t pp = 0; s = str+ii-1; - kk = parse_word(s, 1, 0)-s; + kk = parse_word(s, 1)-s; if (str[ii] == '[' || *toybuf == 255) { // (( parsed together, not (( ) ) struct sh_arg aa = {0}; long long ll; @@ -1884,7 +1883,7 @@ static int expand_arg_nobrace(struct sh_arg *arg, char *str, unsigned flags, if (*ss != '<') ss = 0; else { while (isspace(*++ss)); - if (!(ll = parse_word(ss, 0, 0)-ss)) ss = 0; + if (!(ll = parse_word(ss, 0)-ss)) ss = 0; else { jj = ll+(ss-s); while (isspace(s[jj])) jj++; @@ -1904,8 +1903,10 @@ static int expand_arg_nobrace(struct sh_arg *arg, char *str, unsigned flags, close(jj); } } else if (!str[ii]) new[oo++] = cc; - else if (cc=='\\') - new[oo++] = (!(qq&1) || strchr("\"\\$`", str[ii])) ? str[ii++] : cc; + else if (cc=='\\') { + if (str[ii]=='\n') ii++; + else new[oo++] = (!(qq&1) || strchr("\"\\$`", str[ii])) ? str[ii++] : cc; + } // $VARIABLE expansions @@ -2292,7 +2293,7 @@ static int expand_arg(struct sh_arg *arg, char *old, unsigned flags, // collect brace spans if ((TT.options&OPT_B) && !(flags&NO_BRACE)) for (i = 0; ; i++) { // skip quoted/escaped text - while ((s = parse_word(old+i, 1, 0)) != old+i) i += s-(old+i); + while ((s = parse_word(old+i, 1)) != old+i) i += s-(old+i); // start a new span if (old[i] == '{') { @@ -2492,7 +2493,6 @@ static struct sh_process *expand_redir(struct sh_arg *arg, int skip, int *urd) pp->raw = arg; // When redirecting, copy each displaced filehandle to restore it later. - // Expand arguments and perform redirections for (j = skip; jc; j++) { int saveclose = 0, bad = 0; @@ -2591,7 +2591,8 @@ static struct sh_process *expand_redir(struct sh_arg *arg, int skip, int *urd) break; } len = strlen(ss); - if (len != writeall(from, ss, len)) bad++; + if (len != writeall(from, ss, len) || 1 != writeall(from, "\n", 1)) + bad++; if (ss != sss) free(ss); } else { struct sh_arg *hh = arg+ ++here; @@ -2909,7 +2910,7 @@ static int free_process(struct sh_process *pp) static void free_pipeline(void *pipeline) { struct sh_pipeline *pl = pipeline; - int i, j; + int i, j, k; if (!pl) return; @@ -2920,7 +2921,8 @@ static void free_pipeline(void *pipeline) } for (j=0; j<=pl->count; j++) { if (!pl->arg[j].v) continue; - for (i = 0; i<=pl->arg[j].c; i++) free(pl->arg[j].v[i]); + k = pl->arg[j].c-!!pl->count; + for (i = 0; i<=k; i++) free(pl->arg[j].v[i]); free(pl->arg[j].v); } free(pl); @@ -2955,34 +2957,47 @@ static int parse_line(char *line, struct sh_pipeline **ppl, // Extend/resume quoted block if (arg->c<0) { - delete = start = xmprintf("%s%s", arg->v[arg->c = (-arg->c)-1], start); - free(arg->v[arg->c]); + arg->c = (-arg->c)-1; + if (start) { + delete = start = xmprintf("%s%s", arg->v[arg->c], start); + free(arg->v[arg->c]); + } else start = arg->v[arg->c]; arg->v[arg->c] = 0; // is a HERE document in progress? } else if (pl->count != pl->here) { +here_loop: // Back up to oldest unfinished pipeline segment. while (pl != *ppl && pl->prev->count != pl->prev->here) pl = pl->prev; arg = pl->arg+1+pl->here; // Match unquoted EOF. + if (!line) { + error_msg("%u: <<%s EOF", TT.LINENO, arg->v[arg->c]); + goto here_end; + } for (s = line, end = arg->v[arg->c]; *end; s++, end++) { - s += strspn(s, "\\\"'"); + end += strspn(end, "\\\"'\n"); if (!*s || *s != *end) break; } + // Add this line, else EOF hit so end HERE document - if (*s || *end) { + if ((*s && *s!='\n') || *end) { end = arg->v[arg->c]; arg_add(arg, xstrdup(line)); arg->v[arg->c] = end; } else { +here_end: // End segment and advance/consume bridge segments arg->v[arg->c] = 0; if (pl->count == ++pl->here) while (pl->next != *ppl && (pl = pl->next)->here == -1) pl->here = pl->count; } - if (pl->here != pl->count) return 1; + if (pl->here != pl->count) { + if (!line) goto here_loop; + else return 1; + } start = 0; // Nope, new segment if not self-managing type @@ -3014,11 +3029,12 @@ static int parse_line(char *line, struct sh_pipeline **ppl, } // queue up HERE EOF so input loop asks for more lines. - *(arg[pl->count].v = xzalloc(2*sizeof(void *))) = arg->v[++i]; - arg[pl->count].c = 0; + memset(arg+pl->count, 0, sizeof(*arg)); + arg_add(arg+pl->count, arg->v[++i]); + arg[pl->count].c--; } // Mark "bridge" segment when previous pl had HERE but this doesn't - if (!pl->count && pl->prev->count != pl->prev->here) pl->prev->here = -1; + if (!pl->count && pl->prev->count != pl->prev->here) pl->here = -1; pl = 0; } if (done) break; @@ -3026,10 +3042,10 @@ static int parse_line(char *line, struct sh_pipeline **ppl, // skip leading whitespace/comment here to know where next word starts while (isspace(*start)) ++start; - if (*start=='#') while (*start && *start != '\n') ++start; + if (*start=='#') while (*start) ++start; // Parse next word and detect overflow (too many nested quotes). - if ((end = parse_word(start, 0, 0)) == (void *)1) goto flush; + if ((end = parse_word(start, 0)) == (void *)1) goto flush; //dprintf(2, "%d %p(%d) %s word=%.*s\n", getpid(), pl, pl ? pl->type : -1, ex, (int)(end-start), end ? start : ""); // End function declaration? @@ -3331,7 +3347,7 @@ static int parse_line(char *line, struct sh_pipeline **ppl, } free(delete); - // ignore blank and comment lines + // Return now if line didn't tell us to DO anything. if (!*ppl) return 0; pl = (*ppl)->prev; @@ -3578,8 +3594,7 @@ static char *get_next_line(FILE *ff, int prompt) break; } if (!(len&63)) new = xrealloc(new, len+65); - if (cc == '\n') break; - new[len++] = cc; + if ((new[len++] = cc) == '\n') break; } if (new) new[len] = 0; @@ -4057,7 +4072,7 @@ int do_source(char *name, FILE *ff) if (name) TT.ff->omnom = name; -// TODO fix/catch NONBLOCK on input? +// TODO fix/catch O_NONBLOCK on input? // TODO when DO we reset lineno? (!LINENO means \0 returns 1) // when do we NOT reset lineno? Inherit but preserve perhaps? newline in $()? if (!name) TT.LINENO = 0; @@ -4081,7 +4096,7 @@ int do_source(char *name, FILE *ff) // prints "hello" vs "hello\" // returns 0 if line consumed, command if it needs more data - more = parse_line(new ? : " ", &pl, &expect); + more = parse_line(new, &pl, &expect); free(new); if (more==1) { if (!new) syntax_err("unexpected end of file"); @@ -4238,7 +4253,7 @@ void sh_main(void) char *cc = 0; FILE *ff; -//unsigned uu; dprintf(2, "%d main", getpid()); for (uu = 0; toys.argv[uu]; uu++) dprintf(2, " %s", toys.argv[uu]); dprintf(2, "\n"); +//dprintf(2, "%d main", getpid()); for (unsigned uu = 0; toys.argv[uu]; uu++) dprintf(2, " %s", toys.argv[uu]); dprintf(2, "\n"); signal(SIGPIPE, SIG_IGN); TT.options = OPT_B;