Rename lex.c to fastlex.c so there's no confusion with osh/lex.py.

Also: Updates to line count script. Count code generators and generated code.
oilshell · Nov 22, 2017 · ccb0a23 · ccb0a23
1 parent f06278c
commit ccb0a23
Show file tree

Hide file tree

Showing 9 changed files with 57 additions and 21 deletions.
diff --git a/asdl/gen_cpp.py b/asdl/gen_cpp.py
@@ -395,7 +395,7 @@ class CEnumVisitor(AsdlVisitor):
   def VisitSimpleSum(self, sum, name, depth):
     # Just use #define, since enums aren't namespaced.
     for i, variant in enumerate(sum.types):
-      self.Emit('#define %s__%s %d;' % (name, variant.name, i + 1), depth)
+      self.Emit('#define %s__%s %d' % (name, variant.name, i + 1), depth)
     self.Emit("", depth)
 
 

diff --git a/build/codegen.sh b/build/codegen.sh
@@ -60,7 +60,7 @@ all() {
   lex-gen-native
 
   # Why do we need this?
-  rm _devbuild/pylibc/x86_64/lex.so
+  rm -f _devbuild/pylibc/x86_64/lex.so
 
   # Note: This also does pylibc, which we don't want.
   build/dev.sh all

diff --git a/build/dev.sh b/build/dev.sh
@@ -30,7 +30,7 @@ gen-help() {
   build/doc.sh oil-quick-ref
 }
 
-# TODO: should lex.c be part of the dev build?  It means you need re2c
+# TODO: should fastlex.c be part of the dev build?  It means you need re2c
 # installed?  I don't think it makes sense to have 3 builds, so yes I think we
 # can put it here for simplicity.
 # However one problem is that if the Python lexer definition is changed, then
@@ -46,18 +46,18 @@ pylibc() {
   local libc_so=$(echo _devbuild/pylibc/$arch/libc.so)
   ln -s -f -v $libc_so libc.so
 
-  local lex_so=$(echo _devbuild/pylibc/$arch/lex.so)
-  ln -s -f -v $lex_so lex.so
+  local fastlex_so=$(echo _devbuild/pylibc/$arch/fastlex.so)
+  ln -s -f -v $fastlex_so fastlex.so
 
-  file libc.so lex.so
+  file libc.so fastlex.so
 }
 
 # Also done by unit.sh.
 test-pylibc() {
   export PYTHONPATH=.
   pylibc
   native/libc_test.py
-  native/lex_test.py
+  native/fastlex_test.py
 }
 
 clean-pylibc() {

diff --git a/build/setup.py b/build/setup.py
@@ -9,10 +9,10 @@
       description = 'Module for libc functions like fnmatch()',
       ext_modules = [module])
 
-module = Extension('lex',
-                    sources = ['native/lex.c'])
+module = Extension('fastlex',
+                    sources = ['native/fastlex.c'])
 
-setup(name = 'lex',
+setup(name = 'fastlex',
       version = '1.0',
       description = 'Module to speed up lexers',
       include_dirs = ['_build/gen'],

diff --git a/core/lexer.py b/core/lexer.py
@@ -52,6 +52,24 @@ def FindLongestMatch(re_list, s, pos):
   return end_index, tok_type, tok_val
 
 
+# TODO: LineLexer needs the matcher rather than lexer_def.
+
+class MatchTokenSlow(object):
+  """An abstract matcher that doesn't depend on OSH."""
+  def __init__(self, lexer_def):
+    self.lexer_def = lexer_def
+
+  def __call__(self, lex_mode, line, start_index):
+    """Returns (id, end_index)."""
+    return FindLongestMatch(self.lexer_def[lex_mode], line, start_index)
+
+
+def MatchTokenFast(lex_mode, line, start_index):
+  """Returns (id, end_index)."""
+  tok_type, end_index = lex.MatchToken(lex_mode.enum_id, line, start_index)
+  return Id(tok_type), end_index
+
+
 class LineLexer(object):
   def __init__(self, lexer_def, line, arena=None):
     # Compile all regexes

diff --git a/native/lex.c → native/fastlex.c b/native/lex.c → native/fastlex.c
@@ -25,7 +25,7 @@ void debug(const char* fmt, ...) {
 }
 
 static PyObject *
-lex_MatchToken(PyObject *self, PyObject *args) {
+fastlex_MatchToken(PyObject *self, PyObject *args) {
   int lex_mode;
   const char* line;
 
@@ -57,11 +57,11 @@ lex_MatchToken(PyObject *self, PyObject *args) {
 // FastTokenMatcher
 
 PyMethodDef methods[] = {
-  {"MatchToken", lex_MatchToken, METH_VARARGS,
+  {"MatchToken", fastlex_MatchToken, METH_VARARGS,
    "(lexer mode, line, start_index) -> (id, end_index)."},
   {NULL, NULL},
 };
 
-void initlex(void) {
-  Py_InitModule("lex", methods);
+void initfastlex(void) {
+  Py_InitModule("fastlex", methods);
 }
diff --git a/native/lex_test.py → native/fastlex_test.py b/native/lex_test.py → native/fastlex_test.py
@@ -14,20 +14,20 @@
 from core.id_kind import Id
 from osh import ast_ as ast
 
-import lex  # module under test
+import fastlex  # module under test
 
 lex_mode_e = ast.lex_mode_e
 
 
 def MatchToken(lex_mode, line, s):
-  tok_type, end_index = lex.MatchToken(lex_mode.enum_id, line, s)
+  tok_type, end_index = fastlex.MatchToken(lex_mode.enum_id, line, s)
   return Id(tok_type), end_index
 
 
 class LexTest(unittest.TestCase):
 
   def testMatchToken(self):
-    print(dir(lex))
+    print(dir(fastlex))
     print lex_mode_e.COMMENT.enum_id
     result = MatchToken(lex_mode_e.COMMENT, 'line', 3)
     print result

diff --git a/osh/lex_gen.py b/osh/lex_gen.py
@@ -65,11 +65,21 @@ def main(argv):
 
   print """
 
-inline void MatchToken(int lexer_mode, char* line, int line_len, int start_index,
+inline void MatchToken(int lex_mode, char* line, int line_len, int start_index,
                 int* id, int* end_index) {
-  *id = id__Lit_Chars;
-  //*id = id__Lit_Other;
-  *end_index = 3;
+  switch (lex_mode)  {
+  case lex_mode__OUTER:
+    *id = id__Lit_Chars;
+    //*id = id__Lit_Other;
+    *end_index = 3;
+    break;
+  case lex_mode__COMMENT:
+    *id = id__Lit_Other;
+    *end_index = 5;
+    break;
+  default:
+    assert(0);
+  }
 }
 """
   return

diff --git a/scripts/count.sh b/scripts/count.sh
@@ -44,6 +44,14 @@ all() {
   wc -l asdl/{asdl_,py_meta,gen_cpp,encode,format}.py | sort --numeric
   echo
 
+  echo 'CODE GENERATORS'
+  wc -l */*_gen.py | sort --numeric
+  echo
+
+  echo 'GENERATED CODE'
+  wc -l _devbuild/*.py _build/gen/* | sort --numeric
+  echo
+
   echo 'TOOLS'
   ls tools/*.py | filter-py | xargs wc -l | sort --numeric
   echo