Skip to content

Commit

Permalink
Rename lex.c to fastlex.c so there's no confusion with osh/lex.py.
Browse files Browse the repository at this point in the history
Also: Updates to line count script. Count code generators and generated
code.
  • Loading branch information
Andy Chu committed Nov 22, 2017
1 parent f06278c commit ccb0a23
Show file tree
Hide file tree
Showing 9 changed files with 57 additions and 21 deletions.
2 changes: 1 addition & 1 deletion asdl/gen_cpp.py
Expand Up @@ -395,7 +395,7 @@ class CEnumVisitor(AsdlVisitor):
def VisitSimpleSum(self, sum, name, depth):
# Just use #define, since enums aren't namespaced.
for i, variant in enumerate(sum.types):
self.Emit('#define %s__%s %d;' % (name, variant.name, i + 1), depth)
self.Emit('#define %s__%s %d' % (name, variant.name, i + 1), depth)
self.Emit("", depth)


Expand Down
2 changes: 1 addition & 1 deletion build/codegen.sh
Expand Up @@ -60,7 +60,7 @@ all() {
lex-gen-native

# Why do we need this?
rm _devbuild/pylibc/x86_64/lex.so
rm -f _devbuild/pylibc/x86_64/lex.so

# Note: This also does pylibc, which we don't want.
build/dev.sh all
Expand Down
10 changes: 5 additions & 5 deletions build/dev.sh
Expand Up @@ -30,7 +30,7 @@ gen-help() {
build/doc.sh oil-quick-ref
}

# TODO: should lex.c be part of the dev build? It means you need re2c
# TODO: should fastlex.c be part of the dev build? It means you need re2c
# installed? I don't think it makes sense to have 3 builds, so yes I think we
# can put it here for simplicity.
# However one problem is that if the Python lexer definition is changed, then
Expand All @@ -46,18 +46,18 @@ pylibc() {
local libc_so=$(echo _devbuild/pylibc/$arch/libc.so)
ln -s -f -v $libc_so libc.so

local lex_so=$(echo _devbuild/pylibc/$arch/lex.so)
ln -s -f -v $lex_so lex.so
local fastlex_so=$(echo _devbuild/pylibc/$arch/fastlex.so)
ln -s -f -v $fastlex_so fastlex.so

file libc.so lex.so
file libc.so fastlex.so
}

# Also done by unit.sh.
test-pylibc() {
export PYTHONPATH=.
pylibc
native/libc_test.py
native/lex_test.py
native/fastlex_test.py
}

clean-pylibc() {
Expand Down
6 changes: 3 additions & 3 deletions build/setup.py
Expand Up @@ -9,10 +9,10 @@
description = 'Module for libc functions like fnmatch()',
ext_modules = [module])

module = Extension('lex',
sources = ['native/lex.c'])
module = Extension('fastlex',
sources = ['native/fastlex.c'])

setup(name = 'lex',
setup(name = 'fastlex',
version = '1.0',
description = 'Module to speed up lexers',
include_dirs = ['_build/gen'],
Expand Down
18 changes: 18 additions & 0 deletions core/lexer.py
Expand Up @@ -52,6 +52,24 @@ def FindLongestMatch(re_list, s, pos):
return end_index, tok_type, tok_val


# TODO: LineLexer needs the matcher rather than lexer_def.

class MatchTokenSlow(object):
"""An abstract matcher that doesn't depend on OSH."""
def __init__(self, lexer_def):
self.lexer_def = lexer_def

def __call__(self, lex_mode, line, start_index):
"""Returns (id, end_index)."""
return FindLongestMatch(self.lexer_def[lex_mode], line, start_index)


def MatchTokenFast(lex_mode, line, start_index):
"""Returns (id, end_index)."""
tok_type, end_index = lex.MatchToken(lex_mode.enum_id, line, start_index)
return Id(tok_type), end_index


class LineLexer(object):
def __init__(self, lexer_def, line, arena=None):
# Compile all regexes
Expand Down
8 changes: 4 additions & 4 deletions native/lex.c → native/fastlex.c
Expand Up @@ -25,7 +25,7 @@ void debug(const char* fmt, ...) {
}

static PyObject *
lex_MatchToken(PyObject *self, PyObject *args) {
fastlex_MatchToken(PyObject *self, PyObject *args) {
int lex_mode;
const char* line;

Expand Down Expand Up @@ -57,11 +57,11 @@ lex_MatchToken(PyObject *self, PyObject *args) {
// FastTokenMatcher

PyMethodDef methods[] = {
{"MatchToken", lex_MatchToken, METH_VARARGS,
{"MatchToken", fastlex_MatchToken, METH_VARARGS,
"(lexer mode, line, start_index) -> (id, end_index)."},
{NULL, NULL},
};

void initlex(void) {
Py_InitModule("lex", methods);
void initfastlex(void) {
Py_InitModule("fastlex", methods);
}
6 changes: 3 additions & 3 deletions native/lex_test.py → native/fastlex_test.py
Expand Up @@ -14,20 +14,20 @@
from core.id_kind import Id
from osh import ast_ as ast

import lex # module under test
import fastlex # module under test

lex_mode_e = ast.lex_mode_e


def MatchToken(lex_mode, line, s):
tok_type, end_index = lex.MatchToken(lex_mode.enum_id, line, s)
tok_type, end_index = fastlex.MatchToken(lex_mode.enum_id, line, s)
return Id(tok_type), end_index


class LexTest(unittest.TestCase):

def testMatchToken(self):
print(dir(lex))
print(dir(fastlex))
print lex_mode_e.COMMENT.enum_id
result = MatchToken(lex_mode_e.COMMENT, 'line', 3)
print result
Expand Down
18 changes: 14 additions & 4 deletions osh/lex_gen.py
Expand Up @@ -65,11 +65,21 @@ def main(argv):

print """
inline void MatchToken(int lexer_mode, char* line, int line_len, int start_index,
inline void MatchToken(int lex_mode, char* line, int line_len, int start_index,
int* id, int* end_index) {
*id = id__Lit_Chars;
//*id = id__Lit_Other;
*end_index = 3;
switch (lex_mode) {
case lex_mode__OUTER:
*id = id__Lit_Chars;
//*id = id__Lit_Other;
*end_index = 3;
break;
case lex_mode__COMMENT:
*id = id__Lit_Other;
*end_index = 5;
break;
default:
assert(0);
}
}
"""
return
Expand Down
8 changes: 8 additions & 0 deletions scripts/count.sh
Expand Up @@ -44,6 +44,14 @@ all() {
wc -l asdl/{asdl_,py_meta,gen_cpp,encode,format}.py | sort --numeric
echo

echo 'CODE GENERATORS'
wc -l */*_gen.py | sort --numeric
echo

echo 'GENERATED CODE'
wc -l _devbuild/*.py _build/gen/* | sort --numeric
echo

echo 'TOOLS'
ls tools/*.py | filter-py | xargs wc -l | sort --numeric
echo
Expand Down

0 comments on commit ccb0a23

Please sign in to comment.