[test/count-procs] Count the processes started by different shells.

Add comments in the code where I think extra processes are invoked. Addresses issue #254. Also: - Minor translation fixes. Down to 89 errors. - Failing spec tests for 'proc return' problem. Which is also causing a type error.
oilshell · Mar 27, 2020 · b1d6de9 · b1d6de9
1 parent fc71385
commit b1d6de9
Show file tree

Hide file tree

Showing 11 changed files with 300 additions and 51 deletions.
diff --git a/bin/osh_eval.py b/bin/osh_eval.py
@@ -34,7 +34,7 @@
 from osh import word_eval
 
 _ = log
-_ = cmd_exec
+_2 = cmd_exec  # lint ignore
 
 from typing import List, Dict, Tuple, Optional, cast, TYPE_CHECKING
 if TYPE_CHECKING:

diff --git a/core/ansi.py b/core/ansi.py
@@ -9,6 +9,7 @@
 UNDERLINE = '\x1b[4m'
 REVERSE = '\x1b[7m'  # reverse video
 
+RED = '\x1b[31m'
 GREEN = '\x1b[32m'
 YELLOW = '\x1b[33m'
 BLUE = '\x1b[34m'

diff --git a/cpp/libc.h b/cpp/libc.h
@@ -30,6 +30,10 @@ inline Tuple2<int, int>* regex_first_group_match(Str* pattern, Str* str, int pos
   assert(0);
 }
 
+inline void print_time(double real, double user, double sys) {
+  assert(0);
+}
+
 }  // namespace libc
 
 #endif  // LIBC_H
diff --git a/cpp/osh_eval_stubs.h b/cpp/osh_eval_stubs.h
@@ -60,6 +60,7 @@ namespace pyutil {
 
 namespace builtin_misc {
   class _Builtin {
+   public:
     int Run(runtime_asdl::cmd_value_t* cmd_val) {
       assert(0);
     }

diff --git a/osh/builtin_meta.py b/osh/builtin_meta.py
@@ -11,7 +11,7 @@
 from frontend import args
 from frontend import arg_def
 from frontend import reader
-from osh.builtin_misc import _Builtin
+#from osh.builtin_misc import _Builtin
 from mycpp import mylib
 
 from typing import TYPE_CHECKING
@@ -40,7 +40,8 @@ def _EvalHelper(arena, ex, c_parser, src):
     arena.PopSource()
 
 
-class Eval(_Builtin):
+class Eval(object):
+
   def __init__(self, parse_ctx, exec_opts, ex):
     # type: (ParseContext, optview.Exec, Executor) -> None
     self.parse_ctx = parse_ctx
@@ -72,7 +73,8 @@ def Run(self, cmd_val):
     return _EvalHelper(self.arena, self.ex, c_parser, src)
 
 
-class Source(_Builtin):
+class Source(object):
+
   def __init__(self, parse_ctx, search_path, ex, errfmt):
     # type: (ParseContext, state.SearchPath, Executor, ui.ErrorFormatter) -> None
     self.parse_ctx = parse_ctx

diff --git a/osh/builtin_pure.py b/osh/builtin_pure.py
@@ -24,6 +24,7 @@
 from core import optview
 from core import state
 from core import ui
+from core.util import log
 from frontend import args
 from frontend import arg_def
 from frontend import consts
@@ -43,6 +44,8 @@
   from osh.cmd_exec import Executor
   from core.state import MutableOpts, Mem, SearchPath
 
+_ = log
+
 
 class Boolean(_Builtin):
   """For :, true, false."""
@@ -326,6 +329,15 @@ def Run(self, cmd_val, fork_external):
     # shift by one
     cmd_val = cmd_value.Argv(cmd_val.argv[1:], cmd_val.arg_spids[1:])
     # 'command ls' suppresses function lookup.
+
+    # BUG: fork_external doesn't appear to have any effect here!
+    #log('cmd fork_external %s', fork_external)
+
+    # For example in 'command ls / |  wc -l', we don't need to fork a process
+    # for 'ls /' -- it can just exec.
+
+    # This makes it fail!
+    #fork_external = True
     return self.ex.RunSimpleCommand(cmd_val, fork_external, funcs=False)
 
 

diff --git a/osh/cmd_exec.py b/osh/cmd_exec.py
@@ -59,7 +59,8 @@
 from _devbuild.gen.runtime_asdl import (
     quote_e,
     lvalue, lvalue_e, lvalue__ObjIndex, lvalue__ObjAttr,
-    value, value_e, value_t, value__Str, value__MaybeStrArray, value__Obj,
+    value, value_e, value_t, value__Int, value__Str, value__MaybeStrArray,
+    value__Obj,
     redirect, redirect_arg, scope_e,
     cmd_value__Argv, cmd_value, cmd_value_e,
     cmd_value__Argv, cmd_value__Assign,
@@ -273,17 +274,6 @@ def _RunBuiltinAndRaise(self, builtin_id, cmd_val, fork_external):
     Raises:
       args.UsageError
     """
-    # Shift one arg.  Builtins don't need to know their own name.
-    argv = cmd_val.argv[1:]
-
-    # TODO: For now, hard-code the builtins that take a block, and pass them
-    # cmd_val.
-    # Later, we should give builtins signatures like this and check them:
-    #
-    # proc cd(argv Array[Str], b Block) {
-    #   do evaluate(b, locals, globals)
-    # }
-
     # Most builtins dispatch with a dictionary
     builtin_func = self.builtins.get(builtin_id)
     if builtin_func is not None:
@@ -297,7 +287,7 @@ def _RunBuiltinAndRaise(self, builtin_id, cmd_val, fork_external):
       status = b.Run(cmd_val, fork_external)
 
     elif builtin_id == builtin_i.builtin:  # NOTE: uses early return style
-      if len(argv) == 0:
+      if len(cmd_val.argv) == 1:
         return 0  # this could be an error in strict mode?
 
       name = cmd_val.argv[1]
@@ -721,12 +711,18 @@ def RunSimpleCommand(self, cmd_val, fork_external, funcs=True):
       self.errfmt.Print('%r not found', arg0, span_id=span_id)
       return 127
 
+    # SubProgramThunk sets this to false
+    #log('[PID %d] fork_external: %s', posix.getpid(), fork_external)
+
+    # Normal case: ls /
     if fork_external:
       thunk = process.ExternalThunk(self.ext_prog, argv0_path, cmd_val, environ)
       p = process.Process(thunk, self.job_state)
       status = p.Run(self.waiter)
       return status
 
+    # Already forked for pipeline: ls / | wc -l
+    # TODO: count subshell?  ( ls / ) vs. ( ls /; ls / )
     self.ext_prog.Exec(argv0_path, cmd_val, environ)  # NEVER RETURNS
     assert False, "This line should never be reached" # makes mypy happy
 
@@ -1173,9 +1169,28 @@ def _Dispatch(self, node, fork_external):
 
       elif case(command_e.Return):
         node = cast(command__Return, UP_node)
-        val = self.expr_ev.EvalExpr(node.e)
-        # TODO: Does this have to be an integer?
-        raise _ControlFlow(node.keyword, val)
+        obj = self.expr_ev.EvalExpr(node.e)
+
+        if 0:
+          val = _PyObjectToVal(obj)
+
+          status_code = -1
+          UP_val = val
+          with tagswitch(val) as case:
+            if case(value_e.Int):
+              val = cast(value__Int, UP_val)
+              status_code = val.i
+            elif case(value_e.Str):
+              val = cast(value__Str, UP_val)
+              try:
+                status_code = int(val.s)
+              except ValueError:
+                e_die('Invalid return value %r', val.s, token=node.keyword)
+            else:
+              e_die('Expected integer return value, got %r', val,
+                    token=node.keyword)
+
+        raise _ControlFlow(node.keyword, obj)
 
       elif case(command_e.Expr):
         node = cast(command__Expr, UP_node)
@@ -1548,6 +1563,7 @@ def _Dispatch(self, node, fork_external):
         s_real, s_user, s_sys = passwd.Time()
         status = self._Execute(node.pipeline)
         e_real, e_user, e_sys = passwd.Time()
+        # note: mycpp doesn't support %.3f
         libc.print_time(e_real - s_real, e_user - s_user, e_sys - s_sys)
 
       else:

diff --git a/spec/oil-func-proc.test.sh b/spec/oil-func-proc.test.sh
@@ -349,3 +349,24 @@ echo $f(42)
 43
 ## END
 
+#### proc returning wrong type
+
+# this should print an error message
+proc f {
+  var a = @(one two)
+  return a
+}
+f
+## STDOUT:
+## END
+
+#### proc returning invalid string
+
+# this should print an error message
+proc f {
+  var s = 'not an integer status'
+  return s
+}
+f
+## STDOUT:
+## END
diff --git a/test/count-procs.sh b/test/count-procs.sh
@@ -7,44 +7,149 @@ set -o nounset
 set -o pipefail
 set -o errexit
 
+# Run it against the dev version of OSH
+REPO_ROOT=$(cd $(dirname $(dirname $0)) && pwd)
+
 count-procs() {
-  local sh=$1
-  local code=$2
+  local out_prefix=$1
+  local sh=$2
+  local code=$3
+
+  case $sh in 
+    # avoid the extra processes that bin/osh starts!
+    osh)
+      sh="env PYTHONPATH=$REPO_ROOT:$REPO_ROOT/vendor $REPO_ROOT/bin/oil.py osh"
+      ;;
+  esac
+
+  strace -ff -o $out_prefix -- $sh -c "$code"
+}
+
+readonly -a SHELLS=(dash bash mksh zsh osh)
+readonly BASE_DIR='_tmp/count-procs'
+
+run-case() {
+  ### Run a test case with many shells
+
+  local num=$1
+  local code_str=$2
+
+  echo
+  echo "==="
+  echo "$code_str"
+  echo "==="
+
+  local base_dir=$BASE_DIR
+
+  for sh in "${SHELLS[@]}"; do
+    local out_prefix=$base_dir/$num-$sh
+    echo "--- $sh ---"
+    count-procs $out_prefix $sh "$code_str"
+  done
+
+  return
+  echo "Process counts"
+
+  for sh in "${SHELLS[@]}"; do
+    echo "--- $sh ---"
+    ls $base_dir/$sh | wc -l
+  done
+}
+
+print-cases() {
+  # format:  number, whitespace, then an arbitrary code string
+  egrep -v '^[[:space:]]*(#|$)' <<EOF
+
+# 1 process of course
+echo hi
+
+# dash and zsh somehow optimize this to 1
+(echo hi)
+
+# command sub
+echo \$(ls)
+
+# command sub with builtin
+echo \$(echo hi)
 
-  # Hm I didn't know -e is like a mini-language.  '-e open' is thes same as '-e
-  # trace=open'.  signal=none turns off the SIGCHLD lines.
-  #
-  # NOTE we grep for [pid and rely on the code itself to echo [pid-of-sh $$].
+# 2 processes for all shells
+( echo hi ); echo done
 
-  code='echo "[pid-of-sh $$]";'" $code"
-  strace -e 'trace=fork,execve' -e 'signal=none' -e 'verbose=none' -ff -- \
-    $sh -c "$code" 2>&1 | fgrep '[pid' || true
+# 3 processes
+ls | wc -l
+
+# every shell does 3
+echo a | wc -l
+
+# every shell does 3
+command echo a | wc -l
+
+# bash does 4 here!
+command ls / | wc -l
+
+# 3 processes for all?
+# osh gives FIVE???  But others give 3.  That's bad.
+( ls ) | wc -l
+
+# 3 processes for all shells except zsh and osh, which have shopt -s lastpipe!
+ls | read x
+
+# osh has 3, but should be 2 like zsh?
+# hm how can zsh do 2 here?  That seems impossible.
+# oh it's lastpipe turns the shell process into wc -l ???  wow.
+{ echo a; echo b; } | wc -l
+
+# zsh behaves normally here.  That is a crazy optimization.  I guess it's
+# nice when you have SH -c 'mypipeline | wc-l'
+{ echo a; echo b; } | wc -l; echo done
+
+# this is all over the map too.  3 4 4 2.
+{ echo a; ls /; } | wc -l
+
+# osh does 4 when others do 3.  So every shell optimizes this extra pipeline.
+( echo a; echo b ) | wc -l
+
+# osh does 5 when others do 3.
+( echo a; echo b ) | ( wc -l )
+EOF
 }
 
-test-many() {
-  for code in "$@"; do
-    echo
-    echo
-    echo "--- $code ---"
-    echo
-
-    for sh in dash bash mksh zsh; do
-      echo
-      echo
-      echo "--- $sh ---"
-      echo
-      count-procs $sh "$code"
-    done
+number-cases() {
+  # Right justified, leading zeros, with 2
+  # Wish this was %02d
+  print-cases | nl --number-format rz --number-width 2
+}
+
+readonly MAX_CASES=100
+
+run-cases() {
+  mkdir -p $BASE_DIR
+
+  shopt -s nullglob
+  rm -f -v $BASE_DIR/*
+
+  number-cases > $BASE_DIR/cases.txt
+  cat $BASE_DIR/cases.txt | head -n $MAX_CASES | while read -r num code_str; do
+    echo $num
+    echo "[$code_str]"
+
+    run-case $num "$code_str"
   done
+
+  ls -1 $BASE_DIR | tee $BASE_DIR/listing.txt
+  summarize
 }
 
-t1() {
-  test-many \
-    'echo hi' \
-    '/bin/echo one; /bin/echo two' \
-    '{ /bin/echo one; /bin/echo two; }' \
-    '{ echo one; echo two; } | wc -l' \
-    '( echo one; echo two ) | wc -l' 
+print-table() {
+  PYTHONPATH=. test/count_procs.py "$@"
 }
 
+summarize() {
+  cat $BASE_DIR/listing.txt \
+    | egrep -o '^[0-9]+-[a-z]+' \
+    | sed 's/-/ /g' \
+    | print-table $BASE_DIR/cases.txt | tee $BASE_DIR/table.txt
+}
+
+
 "$@"