Small tweaks to benchmarks; minor code cleanup.

Looking for place to optimize the code in Python.
oils-for-unix · Mar 4, 2019 · 2837925 · 2837925
1 parent 34c608b
commit 2837925
Show file tree

Hide file tree

Showing 3 changed files with 18 additions and 21 deletions.
diff --git a/benchmarks/pytrace.sh b/benchmarks/pytrace.sh
@@ -11,8 +11,13 @@ set -o nounset
 set -o pipefail
 set -o errexit
 
+export PYTHONPATH='.:vendor'
+
 readonly BIGGEST=benchmarks/testdata/configure-coreutils
+readonly GIT_COMPLETION=testdata/completion/git
+readonly OSH_COMPLETION=../bash-completion/osh_completion
 readonly ABUILD=benchmarks/testdata/abuild
+
 readonly -a RUN_ABUILD=(bin/oil.py osh $ABUILD -h)
 # Slightly faster but not significantly.
 #readonly -a RUN_ABUILD=(_bin/osh $ABUILD -h)
@@ -27,8 +32,16 @@ time-bash-run-abuild() { time bash $ABUILD -h; }
 # Old: ~2.7 seconds (no tracing)
 # 2017/11/27, After ASDL optimization: 0.72 seconds.
 time-run-abuild() { time "${RUN_ABUILD[@]}"; }
+
+# ~250 ms
 time-parse-abuild() { time "${OSH_PARSE[@]}" $ABUILD; }
 
+# ~160 ms
+time-parse-git-completion() { time "${OSH_PARSE[@]}" $GIT_COMPLETION; }
+# ~150 ms
+time-parse-osh-completion() { time "${OSH_PARSE[@]}" $OSH_COMPLETION; }
+
+# 4.3 seconds on lisa
 time-parse-biggest() { time "${OSH_PARSE[@]}" $BIGGEST; }
 
 _cprofile() {
@@ -37,7 +50,7 @@ _cprofile() {
   time python -m cProfile -o $out "$@"
 }
 
-# 3.8 seconds.  So less than 2x overhead.
+# Takes about 380 ms.
 cprofile-osh-parse() {
   local in=${1:-$ABUILD}
   local out=${2:-abuild.cprofile}

diff --git a/frontend/lexer.py b/frontend/lexer.py
@@ -29,7 +29,6 @@ def R(pat, tok_type):
 
 class LineLexer(object):
   def __init__(self, match_func, line, arena):
-    # Compile all regexes
     self.match_func = match_func
     self.arena = arena
 
@@ -61,7 +60,6 @@ def MaybeUnreadOne(self):
       return True
 
   def GetSpanIdForEof(self):
-    assert self.arena, self.arena  # This is mandatory now?
     # zero length is special!
     line_span = syntax.line_span(self.line_id, self.line_pos, 0)
     return self.arena.AddLineSpan(line_span)
@@ -77,9 +75,10 @@ def LookAhead(self, lex_mode):
       lex_mode_e.Outer
     """
     pos = self.line_pos
+    n = len(self.line)
     #print('Look ahead from pos %d, line %r' % (pos,self.line))
     while True:
-      if pos == len(self.line):
+      if pos == n:
         # We don't allow lookahead while already at end of line, because it
         # would involve interacting with the line reader, and we never need
         # it.  In the OUTER mode, there is an explicit newline token, but
@@ -98,9 +97,7 @@ def LookAhead(self, lex_mode):
     return syntax.token(tok_type, tok_val, const.NO_INTEGER)
 
   def Read(self, lex_mode):
-    #assert self.line_pos <= len(self.line), (self.line, self.line_pos)
     tok_type, end_pos = self.match_func(lex_mode, self.line, self.line_pos)
-    #assert end_pos <= len(self.line)
     if tok_type == Id.Eol_Tok:  # Do NOT add a span for this sentinel!
       return syntax.token(tok_type, '', const.NO_INTEGER)
 
@@ -111,7 +108,6 @@ def Read(self, lex_mode):
     # revisit this later.
 
     # TODO: Add this back once arena is threaded everywhere
-    #assert self.line_id != -1
     line_span = syntax.line_span(self.line_id, self.line_pos, len(tok_val))
 
     # NOTE: We're putting the arena hook in LineLexer and not Lexer because we

diff --git a/osh/word_parse.py b/osh/word_parse.py
@@ -118,7 +118,6 @@ def _ReadVarOpArg(self, arg_lex_mode, eof_type=Id.Undefined_Tok,
 
     w = self._ReadCompoundWord(lex_mode=arg_lex_mode, eof_type=eof_type,
                                empty_ok=empty_ok)
-    assert w is not None
 
     # This is for "${s:-}", ${s/a//}, etc.  It is analogous to
     # LooksLikeAssignment where we turn x= into x=''.  It has the same
@@ -247,7 +246,6 @@ def _ParseVarOf(self):
     self._Peek()  # Check for []
     if self.token_type == Id.VOp2_LBracket:
       bracket_op = self._ReadSubscript()
-      assert bracket_op is not None
     else:
       bracket_op = None
 
@@ -260,7 +258,6 @@ def _ParseVarExpr(self, arg_lex_mode):
     Start parsing at the op -- we already skipped past the name.
     """
     part = self._ParseVarOf()
-    assert part is not None
 
     self._Peek()
     if self.token_type == Id.Right_VarSub:
@@ -296,14 +293,12 @@ def _ParseVarExpr(self, arg_lex_mode):
       if self.token_type == Id.VOp2_Slash:
         op_spid = self.cur_token.span_id  # for attributing error to /
         op = self._ReadPatSubVarOp(arg_lex_mode)
-        assert op is not None
         op.spids.append(op_spid)
         # Checked by the method above
         assert self.token_type == Id.Right_VarSub, self.cur_token
 
       elif self.token_type == Id.VOp2_Colon:
         op = self._ReadSliceVarOp()
-        assert op is not None
         # NOTE: } in arithmetic mode.
         if self.token_type != Id.Arith_RBrace:
           # Token seems off; doesn't point to X in # ${a:1:2 X
@@ -744,7 +739,6 @@ def _ReadArithExpr(self):
     # calls self.ReadWord(lex_mode_e.Arith)
     a_parser = tdop.TdopParser(arith_parse.SPEC, self)
     anode = a_parser.Parse()
-    assert anode is not None
     return anode  # could be None
 
   def _ReadArithSubPart(self):
@@ -884,7 +878,6 @@ def _ReadArrayLiteralPart(self):
     words = []
     while True:
       w = w_parser.ReadWord(lex_mode_e.Outer)
-      assert w is not None
 
       if w.tag == word_e.TokenWord:
         word_id = word.CommandId(w)
@@ -918,8 +911,9 @@ def _ReadCompoundWord(self, eof_type=Id.Undefined_Tok,
     num_parts = 0
     done = False
     while not done:
-      allow_done = empty_ok or num_parts != 0
       self._Peek()
+
+      allow_done = empty_ok or num_parts != 0
       if allow_done and self.token_type == eof_type:
         done = True  # e.g. for ${foo//pat/replace}
 
@@ -931,7 +925,6 @@ def _ReadCompoundWord(self, eof_type=Id.Undefined_Tok,
           part = word_part.EscapedLiteralPart(self.cur_token)
         else:
           part = word_part.LiteralPart(self.cur_token)
-          #part.xspans.append(self.cur_token.span_id)
 
         word.parts.append(part)
 
@@ -940,7 +933,6 @@ def _ReadCompoundWord(self, eof_type=Id.Undefined_Tok,
           if t.id == Id.Op_LParen:
             self.lexer.PushHint(Id.Op_RParen, Id.Right_ArrayLiteral)
             part2 = self._ReadArrayLiteralPart()
-            assert part2 is not None
             word.parts.append(part2)
 
       elif self.token_kind == Kind.VSub:
@@ -949,12 +941,10 @@ def _ReadCompoundWord(self, eof_type=Id.Undefined_Tok,
 
       elif self.token_kind == Kind.ExtGlob:
         part = self._ReadExtGlobPart()
-        assert part is not None
         word.parts.append(part)
 
       elif self.token_kind == Kind.Left:
         part = self._ReadLeftParts()
-        assert part is not None
         word.parts.append(part)
 
       # NOT done yet, will advance below
@@ -1095,7 +1085,6 @@ def _ReadWord(self, lex_mode):
 
       else:
         w = self._ReadCompoundWord(lex_mode=lex_mode)
-        assert w is not None
         return w, False
 
     else:
@@ -1137,7 +1126,6 @@ def ReadWord(self, lex_mode):
       if not need_more:
         break
 
-    assert w is not None, w
     self.cursor = w
 
     # TODO: Do consolidation of newlines in the lexer?