Permalink
Browse files

Rename lex.c to fastlex.c so there's no confusion with osh/lex.py.

Also: Updates to line count script. Count code generators and generated
code.
  • Loading branch information...
Andy Chu
Andy Chu committed Nov 21, 2017
1 parent f06278c commit ccb0a23c881732947cfcf66e997aec3a850313d6
Showing with 57 additions and 21 deletions.
  1. +1 −1 asdl/gen_cpp.py
  2. +1 −1 build/codegen.sh
  3. +5 −5 build/dev.sh
  4. +3 −3 build/setup.py
  5. +18 −0 core/lexer.py
  6. +4 −4 native/{lex.c → fastlex.c}
  7. +3 −3 native/{lex_test.py → fastlex_test.py}
  8. +14 −4 osh/lex_gen.py
  9. +8 −0 scripts/count.sh
View
@@ -395,7 +395,7 @@ class CEnumVisitor(AsdlVisitor):
def VisitSimpleSum(self, sum, name, depth):
# Just use #define, since enums aren't namespaced.
for i, variant in enumerate(sum.types):
self.Emit('#define %s__%s %d;' % (name, variant.name, i + 1), depth)
self.Emit('#define %s__%s %d' % (name, variant.name, i + 1), depth)
self.Emit("", depth)
View
@@ -60,7 +60,7 @@ all() {
lex-gen-native
# Why do we need this?
rm _devbuild/pylibc/x86_64/lex.so
rm -f _devbuild/pylibc/x86_64/lex.so
# Note: This also does pylibc, which we don't want.
build/dev.sh all
View
@@ -30,7 +30,7 @@ gen-help() {
build/doc.sh oil-quick-ref
}
# TODO: should lex.c be part of the dev build? It means you need re2c
# TODO: should fastlex.c be part of the dev build? It means you need re2c
# installed? I don't think it makes sense to have 3 builds, so yes I think we
# can put it here for simplicity.
# However one problem is that if the Python lexer definition is changed, then
@@ -46,18 +46,18 @@ pylibc() {
local libc_so=$(echo _devbuild/pylibc/$arch/libc.so)
ln -s -f -v $libc_so libc.so
local lex_so=$(echo _devbuild/pylibc/$arch/lex.so)
ln -s -f -v $lex_so lex.so
local fastlex_so=$(echo _devbuild/pylibc/$arch/fastlex.so)
ln -s -f -v $fastlex_so fastlex.so
file libc.so lex.so
file libc.so fastlex.so
}
# Also done by unit.sh.
test-pylibc() {
export PYTHONPATH=.
pylibc
native/libc_test.py
native/lex_test.py
native/fastlex_test.py
}
clean-pylibc() {
View
@@ -9,10 +9,10 @@
description = 'Module for libc functions like fnmatch()',
ext_modules = [module])
module = Extension('lex',
sources = ['native/lex.c'])
module = Extension('fastlex',
sources = ['native/fastlex.c'])
setup(name = 'lex',
setup(name = 'fastlex',
version = '1.0',
description = 'Module to speed up lexers',
include_dirs = ['_build/gen'],
View
@@ -52,6 +52,24 @@ def FindLongestMatch(re_list, s, pos):
return end_index, tok_type, tok_val
# TODO: LineLexer needs the matcher rather than lexer_def.
class MatchTokenSlow(object):
"""An abstract matcher that doesn't depend on OSH."""
def __init__(self, lexer_def):
self.lexer_def = lexer_def
def __call__(self, lex_mode, line, start_index):
"""Returns (id, end_index)."""
return FindLongestMatch(self.lexer_def[lex_mode], line, start_index)
def MatchTokenFast(lex_mode, line, start_index):
"""Returns (id, end_index)."""
tok_type, end_index = lex.MatchToken(lex_mode.enum_id, line, start_index)
return Id(tok_type), end_index
class LineLexer(object):
def __init__(self, lexer_def, line, arena=None):
# Compile all regexes
@@ -25,7 +25,7 @@ void debug(const char* fmt, ...) {
}
static PyObject *
lex_MatchToken(PyObject *self, PyObject *args) {
fastlex_MatchToken(PyObject *self, PyObject *args) {
int lex_mode;
const char* line;
@@ -57,11 +57,11 @@ lex_MatchToken(PyObject *self, PyObject *args) {
// FastTokenMatcher
PyMethodDef methods[] = {
{"MatchToken", lex_MatchToken, METH_VARARGS,
{"MatchToken", fastlex_MatchToken, METH_VARARGS,
"(lexer mode, line, start_index) -> (id, end_index)."},
{NULL, NULL},
};
void initlex(void) {
Py_InitModule("lex", methods);
void initfastlex(void) {
Py_InitModule("fastlex", methods);
}
@@ -14,20 +14,20 @@
from core.id_kind import Id
from osh import ast_ as ast
import lex # module under test
import fastlex # module under test
lex_mode_e = ast.lex_mode_e
def MatchToken(lex_mode, line, s):
tok_type, end_index = lex.MatchToken(lex_mode.enum_id, line, s)
tok_type, end_index = fastlex.MatchToken(lex_mode.enum_id, line, s)
return Id(tok_type), end_index
class LexTest(unittest.TestCase):
def testMatchToken(self):
print(dir(lex))
print(dir(fastlex))
print lex_mode_e.COMMENT.enum_id
result = MatchToken(lex_mode_e.COMMENT, 'line', 3)
print result
View
@@ -65,11 +65,21 @@ def main(argv):
print """
inline void MatchToken(int lexer_mode, char* line, int line_len, int start_index,
inline void MatchToken(int lex_mode, char* line, int line_len, int start_index,
int* id, int* end_index) {
*id = id__Lit_Chars;
//*id = id__Lit_Other;
*end_index = 3;
switch (lex_mode) {
case lex_mode__OUTER:
*id = id__Lit_Chars;
//*id = id__Lit_Other;
*end_index = 3;
break;
case lex_mode__COMMENT:
*id = id__Lit_Other;
*end_index = 5;
break;
default:
assert(0);
}
}
"""
return
View
@@ -44,6 +44,14 @@ all() {
wc -l asdl/{asdl_,py_meta,gen_cpp,encode,format}.py | sort --numeric
echo
echo 'CODE GENERATORS'
wc -l */*_gen.py | sort --numeric
echo
echo 'GENERATED CODE'
wc -l _devbuild/*.py _build/gen/* | sort --numeric
echo
echo 'TOOLS'
ls tools/*.py | filter-py | xargs wc -l | sort --numeric
echo

0 comments on commit ccb0a23

Please sign in to comment.