diff --git a/build/doc.sh b/build/doc.sh
index 3e66ac508c..51d167c9d1 100755
--- a/build/doc.sh
+++ b/build/doc.sh
@@ -107,6 +107,7 @@ readonly MARKDOWN_DOCS=(
   io-builtins
   unicode
   framing
+  xtrace
 
   # Internal stuff
   variable-scope
diff --git a/core/dev.py b/core/dev.py
index 8c89498851..128cdc613f 100644
--- a/core/dev.py
+++ b/core/dev.py
@@ -217,7 +217,10 @@ def __init__(self,
 
     self.word_ev = None  # type: NormalWordEvaluator
 
-    self.indent = 0  # changed by process, proc, source, eval
+    self.ind  = 0  # changed by process, proc, source, eval
+    self.indents = ['']  # "pooled" to avoid allocations
+
+    self.pid = -1
 
     # PS4 value -> compound_word.  PS4 is scoped.
     self.parse_cache = {}  # type: Dict[str, compound_word]
@@ -275,51 +278,93 @@ def _EvalPS4(self):
       self.mutable_opts.set_xtrace(True)
     return prefix.s
 
+  def _TraceBegin(self):
+    # type: () -> Optional[mylib.BufWriter]
+    if not self.exec_opts.xtrace():
+      return None
+
+    # TODO: Using a member variable and then clear() would probably save
+    # pressure.  Tracing is in the inner loop.
+    prefix = self._EvalPS4()
+
+    buf = mylib.BufWriter()
+    if self.exec_opts.xtrace_rich():
+      buf.write(self.indents[self.ind])
+
+    # Note: bash repeats the + for command sub, eval, source.  Other shells
+    # don't do it.  Leave this out for now.
+    buf.write(prefix)
+
+    if self.pid != -1:
+      buf.write(str(self.pid))
+      buf.write(' ')
+
+    return buf
+
+  def _TraceBegin2(self, char):
+    # type: (str) -> Optional[mylib.BufWriter]
+    """For the stack printed by xtrace_rich"""
+    if not self.exec_opts.xtrace() or not self.exec_opts.xtrace_rich():
+      return None
+
+    # TODO: change to _EvalPS4
+    buf = mylib.BufWriter()
+    buf.write(self.indents[self.ind])
+    buf.write(char)
+    if self.pid != -1:
+      buf.write(str(self.pid))
+    buf.write(' ')
+    return buf
+
   def OnProcessStart(self, pid):
     # type: (int) -> None
-    """Print > and the description."""
-    self.f.write('> process %d\n' % pid)
+    buf = self._TraceBegin2('|')
+    if not buf:
+      return
 
-  def OnProcessEnd(self):
-    # type: () -> None
-    """Print < and the description."""
-    #self.f.write('< process %s\n' % pid)
-    # TODO: Need pid and status
-    pass
+    buf.write('process %d\n' % pid)
+    self.f.write(buf.getvalue())
+
+  def OnProcessEnd(self, pid, status):
+    # type: (int, int) -> None
+    buf = self._TraceBegin2('.')
+    if not buf:
+      return
+
+    buf.write('process %s (status %d)\n' % (pid, status))
+    self.f.write(buf.getvalue())
 
   def SetProcess(self, pid):
     # type: (int) -> None
-    pass
+    self.pid = pid
 
   def Push(self, desc):
     # type: (str) -> None
     """Print > and the description."""
-    if not self.exec_opts.xtrace():
+    self.ind += 1
+    if self.ind >= len(self.indents):  # make sure there are enough
+      self.indents.append(self.ind * '  ')
+
+    buf = self._TraceBegin2('>')
+    if not buf:
       return
-    #self.f.write('> %s\n' % desc)
+
+    buf.write(desc)
+    buf.write('\n')
+    self.f.write(buf.getvalue())
 
   def Pop(self, desc):
     # type: (str) -> None
     """Print < and the description."""
-    if not self.exec_opts.xtrace():
+    buf = self._TraceBegin2('<')
+    if not buf:
       return
-    #self.f.write('< %s\n' % desc)
 
-  def _TraceBegin(self):
-    # type: () -> Optional[mylib.BufWriter]
-    if not self.exec_opts.xtrace():
-      return None
-
-    # TODO: Using a member variable and then clear() would probably save
-    # pressure.  Tracing is in the inner loop.
-    self.buf = mylib.BufWriter()
-    prefix = self._EvalPS4()
+    buf.write(desc)
+    buf.write('\n')
+    self.f.write(buf.getvalue())
 
-    buf = mylib.BufWriter()
-    # Note: bash repeats the + for command sub, eval, source.  Other shells
-    # don't do it.  Leave this out for now.
-    buf.write(prefix)
-    return buf
+    self.ind -= 1
 
   def _Value(self, val, buf):
     # type: (value_t, mylib.BufWriter) -> None
diff --git a/core/process.py b/core/process.py
index cc822e335d..8d7f6660ff 100644
--- a/core/process.py
+++ b/core/process.py
@@ -867,10 +867,12 @@ def Start(self):
       for st in self.state_changes:
         st.Apply()
 
+      self.tracer.SetProcess(posix.getpid())
       self.thunk.Run()
       # Never returns
 
     #log('STARTED process %s, pid = %d', self, pid)
+    self.tracer.OnProcessStart(pid)
 
     # Class invariant: after the process is started, it stores its PID.
     self.pid = pid
@@ -888,6 +890,7 @@ def Wait(self, waiter):
         break
       if self.state != job_state_e.Running:
         break
+    self.tracer.OnProcessEnd(self.pid, self.status)
     return self.status
 
   def JobWait(self, waiter):
diff --git a/core/process_test.py b/core/process_test.py
index 712d0bdd08..66e2161e14 100755
--- a/core/process_test.py
+++ b/core/process_test.py
@@ -11,6 +11,8 @@
     redirect, redirect_arg, cmd_value
 )
 from _devbuild.gen.syntax_asdl import redir_loc
+from asdl import runtime
+from core import dev
 from core import process  # module under test
 from core import test_lib
 from core import ui
@@ -18,7 +20,7 @@
 from core.pyerror import log
 from core import state
 from osh import builtin_misc
-from asdl import runtime
+from mycpp import mylib
 
 Process = process.Process
 ExternalThunk = process.ExternalThunk
@@ -29,47 +31,44 @@ def Banner(msg):
   print(msg)
 
 
-# TODO: Put these all in a function.
-_ARENA = test_lib.MakeArena('process_test.py')
-
-_MEM = state.Mem('', [], _ARENA, [])
-_PARSE_OPTS, _EXEC_OPTS, _MUTABLE_OPTS = state.MakeOpts(_MEM, None)
-_MEM.exec_opts = _EXEC_OPTS
-
-state.InitMem(_MEM, {}, '0.1')
-
-_JOB_STATE = process.JobState()
-_WAITER = process.Waiter(_JOB_STATE, _EXEC_OPTS)
-_ERRFMT = ui.ErrorFormatter(_ARENA)
-_FD_STATE = process.FdState(_ERRFMT, _JOB_STATE, None, None)
-_EXT_PROG = process.ExternalProgram('', _FD_STATE, _ERRFMT,
-                                    util.NullDebugFile())
-
-
 def _CommandNode(code_str, arena):
   c_parser = test_lib.InitCommandParser(code_str, arena=arena)
   return c_parser.ParseLogicalLine()
 
 
-def _ExtProc(argv):
-  arg_vec = cmd_value.Argv(argv, [0] * len(argv))
-  argv0_path = None
-  for path_entry in ['/bin', '/usr/bin']:
-    full_path = os.path.join(path_entry, argv[0])
-    if os.path.exists(full_path):
-      argv0_path = full_path
-      break
-  if not argv0_path:
-    argv0_path = argv[0]  # fallback that tests failure case
-  return Process(ExternalThunk(_EXT_PROG, argv0_path, arg_vec, {}), _JOB_STATE, None)
-
-
 class ProcessTest(unittest.TestCase):
 
-  def testStdinRedirect(self):
-    waiter = process.Waiter(_JOB_STATE, _EXEC_OPTS)
-    fd_state = process.FdState(_ERRFMT, _JOB_STATE, None, None)
+  def setUp(self):
+    self.arena = test_lib.MakeArena('process_test.py')
+
+    mem = state.Mem('', [], self.arena, [])
+    parse_opts, exec_opts, mutable_opts = state.MakeOpts(mem, None)
+    mem.exec_opts = exec_opts
+
+    state.InitMem(mem, {}, '0.1')
+
+    self.job_state = process.JobState()
+    self.waiter = process.Waiter(self.job_state, exec_opts)
+    errfmt = ui.ErrorFormatter(self.arena)
+    self.tracer = dev.Tracer(None, exec_opts, mutable_opts, mem, mylib.Stderr())
+    self.fd_state = process.FdState(errfmt, self.job_state, None, self.tracer)
+    self.ext_prog = process.ExternalProgram('', self.fd_state, errfmt,
+                                            util.NullDebugFile())
+
+  def _ExtProc(self, argv):
+    arg_vec = cmd_value.Argv(argv, [0] * len(argv))
+    argv0_path = None
+    for path_entry in ['/bin', '/usr/bin']:
+      full_path = os.path.join(path_entry, argv[0])
+      if os.path.exists(full_path):
+        argv0_path = full_path
+        break
+    if not argv0_path:
+      argv0_path = argv[0]  # fallback that tests failure case
+    return Process(ExternalThunk(self.ext_prog, argv0_path, arg_vec, {}),
+                   self.job_state, self.tracer)
 
+  def testStdinRedirect(self):
     PATH = '_tmp/one-two.txt'
     # Write two lines
     with open(PATH, 'w') as f:
@@ -80,13 +79,13 @@ def testStdinRedirect(self):
     r = redirect(Id.Redir_Less, runtime.NO_SPID, redir_loc.Fd(0),
                  redirect_arg.Path(PATH))
 
-    fd_state.Push([r], waiter)
+    self.fd_state.Push([r], self.waiter)
     line1, _ = builtin_misc._ReadUntilDelim('\n')
-    fd_state.Pop()
+    self.fd_state.Pop()
 
-    fd_state.Push([r], waiter)
+    self.fd_state.Push([r], self.waiter)
     line2, _ = builtin_misc._ReadUntilDelim('\n')
-    fd_state.Pop()
+    self.fd_state.Pop()
 
     # sys.stdin.readline() would erroneously return 'two' because of buffering.
     self.assertEqual('one', line1)
@@ -99,63 +98,62 @@ def testProcess(self):
     print('FDS BEFORE', os.listdir('/dev/fd'))
 
     Banner('date')
-    p = _ExtProc(['date'])
-    status = p.Run(_WAITER)
+    p = self._ExtProc(['date'])
+    status = p.Run(self.waiter)
     log('date returned %d', status)
     self.assertEqual(0, status)
 
     Banner('does-not-exist')
-    p = _ExtProc(['does-not-exist'])
-    print(p.Run(_WAITER))
+    p = self._ExtProc(['does-not-exist'])
+    print(p.Run(self.waiter))
 
     # 12 file descriptors open!
     print('FDS AFTER', os.listdir('/dev/fd'))
 
   def testPipeline(self):
-    node = _CommandNode('uniq -c', _ARENA)
-    cmd_ev = test_lib.InitCommandEvaluator(arena=_ARENA, ext_prog=_EXT_PROG)
+    node = _CommandNode('uniq -c', self.arena)
+    cmd_ev = test_lib.InitCommandEvaluator(arena=self.arena, ext_prog=self.ext_prog)
     print('BEFORE', os.listdir('/dev/fd'))
 
     p = process.Pipeline()
-    p.Add(_ExtProc(['ls']))
-    p.Add(_ExtProc(['cut', '-d', '.', '-f', '2']))
-    p.Add(_ExtProc(['sort']))
+    p.Add(self._ExtProc(['ls']))
+    p.Add(self._ExtProc(['cut', '-d', '.', '-f', '2']))
+    p.Add(self._ExtProc(['sort']))
 
     p.AddLast((cmd_ev, node))
 
-    pipe_status = p.Run(_WAITER, _FD_STATE)
+    pipe_status = p.Run(self.waiter, self.fd_state)
     log('pipe_status: %s', pipe_status)
 
     print('AFTER', os.listdir('/dev/fd'))
 
   def testPipeline2(self):
-    cmd_ev = test_lib.InitCommandEvaluator(arena=_ARENA, ext_prog=_EXT_PROG)
+    cmd_ev = test_lib.InitCommandEvaluator(arena=self.arena, ext_prog=self.ext_prog)
 
     Banner('ls | cut -d . -f 1 | head')
     p = process.Pipeline()
-    p.Add(_ExtProc(['ls']))
-    p.Add(_ExtProc(['cut', '-d', '.', '-f', '1']))
+    p.Add(self._ExtProc(['ls']))
+    p.Add(self._ExtProc(['cut', '-d', '.', '-f', '1']))
 
-    node = _CommandNode('head', _ARENA)
+    node = _CommandNode('head', self.arena)
     p.AddLast((cmd_ev, node))
 
-    fd_state = process.FdState(_ERRFMT, _JOB_STATE, None, None)
-    print(p.Run(_WAITER, _FD_STATE))
+    print(p.Run(self.waiter, self.fd_state))
 
     # Simulating subshell for each command
-    node1 = _CommandNode('ls', _ARENA)
-    node2 = _CommandNode('head', _ARENA)
-    node3 = _CommandNode('sort --reverse', _ARENA)
+    node1 = _CommandNode('ls', self.arena)
+    node2 = _CommandNode('head', self.arena)
+    node3 = _CommandNode('sort --reverse', self.arena)
 
     p = process.Pipeline()
-    p.Add(Process(process.SubProgramThunk(cmd_ev, node1), _JOB_STATE, None))
-    p.Add(Process(process.SubProgramThunk(cmd_ev, node2), _JOB_STATE, None))
-    p.Add(Process(process.SubProgramThunk(cmd_ev, node3), _JOB_STATE, None))
+    p.Add(Process(process.SubProgramThunk(cmd_ev, node1), self.job_state, self.tracer)) 
+    p.Add(Process(process.SubProgramThunk(cmd_ev, node2), self.job_state, self.tracer))
+    p.Add(Process(process.SubProgramThunk(cmd_ev, node3), self.job_state, self.tracer))
 
-    last_thunk = (cmd_ev, _CommandNode('cat', _ARENA))
+    last_thunk = (cmd_ev, _CommandNode('cat', self.arena))
     p.AddLast(last_thunk)
 
-    print(p.Run(_WAITER, _FD_STATE))
+    print(p.Run(self.waiter, self.fd_state))
 
     # TODO: Combine pipelines for other things:
 
@@ -173,8 +171,6 @@ def testPipeline2(self):
     # capture stdout of that interpreter.
 
   def testOpen(self):
-    fd_state = process.FdState(_ERRFMT, _JOB_STATE, None, None)
-
     # Disabled because mycpp translation can't handle it.  We do this at a
     # higher layer.
     return
@@ -183,8 +179,8 @@ def testOpen(self):
     # inconsistent.
     # We follow Python 3 in preferring OSError.
     # https://stackoverflow.com/questions/29347790/difference-between-ioerror-and-oserror
-    self.assertRaises(OSError, fd_state.Open, '_nonexistent_')
-    self.assertRaises(OSError, fd_state.Open, 'metrics/')
+    self.assertRaises(OSError, self.fd_state.Open, '_nonexistent_')
+    self.assertRaises(OSError, self.fd_state.Open, 'metrics/')
 
 
 if __name__ == '__main__':
diff --git a/demo/xtrace1.sh b/demo/xtrace1.sh
index 8896df1dc5..8bb6d4b7c4 100755
--- a/demo/xtrace1.sh
+++ b/demo/xtrace1.sh
@@ -154,16 +154,16 @@ main2() {
 }
 
 slowfunc() {
-  echo $1
-  sleep 0.1
-  echo $2
+  for i in "$@"; do
+    sleep 0.$i
+  done
 }
 
 concurrency() {
   set -x
 
   # PID prefix would be nice here
-  slowfunc 1 2 | slowfunc 3 4
+  slowfunc 1 3 | slowfunc 2 4 5 | slowfunc 6
 }
 
 my_ps4() {
diff --git a/doc/xtrace.md b/doc/xtrace.md
new file mode 100644
index 0000000000..9435e45014
--- /dev/null
+++ b/doc/xtrace.md
@@ -0,0 +1,31 @@
+---
+default_highlighter: oil-sh
+---
+
+Tracing Execution in Oil (xtrace)
+==================================
+
+Examples:
+
+
+    shopt -s xtrace_rich   # stack and processes?  part of oil:basic
+
+    PS4='${X_indent}${X_prefix} '
+
+
+- X_indent is empty unless xtrace_pretty
+- X_prefix contains PID when it's pretty
+  - -+
+  - or it can be: ${X_punct}${X_prefix} so you can add BASH_SOURCE, FUNCNAME,
+    LINENO.
+    - TODO: Add OIL_SOURCE  as alias?
+      and OIL_PID?  or maybe OIL_CURRENT_PID.  or maybe getpid() is better
+      - distinguish between functions and values
+    - Also #$SECONDS for timing
+
+
+## Parsing It
+
+- I think you can specify a regular language
+
+
diff --git a/frontend/option_def.py b/frontend/option_def.py
index 386bcfed9f..559a397575 100644
--- a/frontend/option_def.py
+++ b/frontend/option_def.py
@@ -122,7 +122,7 @@ def DoneWithImplementedOptions(self):
 
     ('command_sub_errexit', False),  # check after command sub
     ('process_sub_fail', False),     # like pipefail, but for <(sort foo.txt)
-
+    ('xtrace_rich', False),          # Hierarchical trace with PIDs
 ]
 
 # TODO: Add strict_arg_parse?  For example, 'trap 1 2 3' shouldn't be
diff --git a/spec/oil-xtrace.test.sh b/spec/oil-xtrace.test.sh
new file mode 100644
index 0000000000..15af6b7863
--- /dev/null
+++ b/spec/oil-xtrace.test.sh
@@ -0,0 +1,21 @@
+# Oil xtrace
+
+#### proc
+shopt --set xtrace xtrace_rich
+
+proc f {
+  : $1
+}
+f hi
+## STDOUT:
+## END
+
+# TODO: consider not quoting ':' and FOO=bar?  And set '+x' doesn't need to be
+# quoted?  Only space does
+
+## STDERR:
++ f hi
+  > proc f
+  + ':' hi
+  < proc f
+## END
diff --git a/test/spec.sh b/test/spec.sh
index ad6af62069..2ad35101b6 100755
--- a/test/spec.sh
+++ b/test/spec.sh
@@ -964,6 +964,11 @@ oil-scope() {
     $OSH_LIST "$@"
 }
 
+oil-xtrace() {
+  sh-spec spec/oil-xtrace.test.sh --osh-failures-allowed 0 \
+    $OSH_LIST "$@"
+}
+
 # Use bin/oil
 
 oil-keywords() {