Permalink
Browse files

Plumbing for the lexer in re2c.

We generate 4 files:

1. id.h from id_kind.py
2. osh-ast.h from osh.asdl (for lex_mode__*)
3. osh-lex.re2c.h with an inline function
4. osh-lex.h -- the latter compiled to plain C with re2c

Files 1, 2, and 4 are included from the Python extension native/lex.c.
  • Loading branch information...
Andy Chu
Andy Chu committed Nov 21, 2017
1 parent 764dc48 commit f06278c6ad2c64e17eab5ef5a67e741f21357f63
Showing with 346 additions and 323 deletions.
  1. +1 −1 Makefile
  2. +25 −4 asdl/gen_cpp.py
  3. +89 −0 build/codegen.sh
  4. +7 −0 build/dev.sh
  5. +1 −0 build/setup.py
  6. +9 −1 core/id_kind_gen.py
  7. +31 −278 native/lex.c
  8. +21 −2 native/lex_test.py
  9. +1 −26 native/libc.c
  10. +10 −11 osh/ast_.py
  11. +54 −0 osh/ast_gen.py
  12. +97 −0 osh/lex_gen.py
View
@@ -68,7 +68,7 @@
# handling directories but I don't know it.
# NOTE: _devbuild is made by build/dev.sh. That directory is NOT cleaned with
# 'make clean'.
$(shell mkdir -p _bin _release _tmp _build/hello _build/oil)
$(shell mkdir -p _bin _release _tmp _build/hello _build/oil _build/gen)
ACTIONS_SH := build/actions.sh
COMPILE_SH := build/compile.sh
View
@@ -163,6 +163,16 @@ def VisitSum(self, sum, name, depth):
else:
self.VisitCompoundSum(sum, name, depth)
# Optionally overridden.
def VisitProduct(self, value, name, depth):
pass
def VisitSimpleSum(self, value, name, depth):
pass
def VisitCompoundSum(self, value, name, depth):
pass
def EmitFooter(self):
pass
class ForwardDeclareVisitor(AsdlVisitor):
"""Print forward declarations.
@@ -193,7 +203,7 @@ def EmitFooter(self):
for line in self.footer:
self.f.write(line)
def EmitEnum(self, sum, name, depth):
def _EmitEnum(self, sum, name, depth):
enum = []
for i in range(len(sum.types)):
type = sum.types[i]
@@ -205,14 +215,14 @@ def EmitEnum(self, sum, name, depth):
self.Emit("", depth)
def VisitSimpleSum(self, sum, name, depth):
self.EmitEnum(sum, name, depth)
self._EmitEnum(sum, name, depth)
def VisitCompoundSum(self, sum, name, depth):
# This is a sign that Python needs string interpolation!!!
def Emit(s, depth=depth):
self.Emit(s % sys._getframe(1).f_locals, depth)
self.EmitEnum(sum, name, depth)
self._EmitEnum(sum, name, depth)
Emit("class %(name)s_t : public Obj {")
Emit(" public:")
@@ -379,6 +389,16 @@ def VisitField(self, field, type_name, offset, depth):
self.Emit("}", depth)
# Used by osh/ast_gen.py
class CEnumVisitor(AsdlVisitor):
def VisitSimpleSum(self, sum, name, depth):
# Just use #define, since enums aren't namespaced.
for i, variant in enumerate(sum.types):
self.Emit('#define %s__%s %d;' % (name, variant.name, i + 1), depth)
self.Emit("", depth)
def main(argv):
try:
action = argv[1]
@@ -389,7 +409,8 @@ def main(argv):
# debugging. Might need to detect cycles though.
if action == 'cpp':
schema_path = argv[2]
module = asdl.parse(schema_path)
with open(schema_path) as input_f:
module = asdl.parse(input_f)
f = sys.stdout
View
@@ -0,0 +1,89 @@
#!/bin/bash
#
# For textual code generation.
#
# Usage:
# ./codegen.sh <function name>
#
# We want a single step build from the git tree, but we also want the generated
# code to be distributed in the release tarball.
#
# For ASDL code generation, re2c, etc.
# NOTE: This is similar to the generation of osh_help.py.
set -o nounset
set -o pipefail
set -o errexit
# Files
#
# native/lex.c -- calls generated function?
# osh/lex.py -- needs a wrapper for FindLongestMatch?
#
# ReadToken(lexer_mode, line, s) -> (t, e)
# _build/gen/
# osh-ast.h - lex_mode_e for now
# id_kind.h - id_e for now
# osh-lex.re2c.c
# osh-lex.c
re2c() {
~/src/re2c-0.16/re2c "$@"
}
ast-gen() {
PYTHONPATH=. osh/ast_gen.py "$@" | tee _build/gen/osh-ast.h
}
id-gen() {
PYTHONPATH=. core/id_kind_gen.py c | tee _build/gen/id.h
}
# _gen/osh_lex.re2c.c
# This includes osh_ast.h
lex-gen() {
PYTHONPATH=. osh/lex_gen.py "$@" | tee _build/gen/osh-lex.re2c.h
}
# re2c native.
lex-gen-native() {
re2c -o _build/gen/osh-lex.h _build/gen/osh-lex.re2c.h
}
all() {
ast-gen
id-gen
lex-gen
lex-gen-native
# Why do we need this?
rm _devbuild/pylibc/x86_64/lex.so
# Note: This also does pylibc, which we don't want.
build/dev.sh all
}
symbols() {
nm _devbuild/pylibc/x86_64/lex.so
}
# Then the next step is build/dev.sh pylibc?
# NOTES:
# - core/id_kind_gen.py generates the mapping from Id to Kind.
# - It needs a mapping output by the Python superoptimizatio script.
# - asdl/gen_cpp.py generates oheap code in main().
# - It should probably be factored into a library and main driver.
# This generates oheap code.
# Also see asdl/run.sh.
gen-cpp() {
PYTHONPATH=. asdl/gen_cpp.py cpp osh/osh.asdl
}
"$@"
View
@@ -30,6 +30,13 @@ gen-help() {
build/doc.sh oil-quick-ref
}
# TODO: should lex.c be part of the dev build? It means you need re2c
# installed? I don't think it makes sense to have 3 builds, so yes I think we
# can put it here for simplicity.
# However one problem is that if the Python lexer definition is changed, then
# you need to run re2c again! I guess you should just provide a script to
# download it.
pylibc() {
mkdir -p _devbuild/pylibc
local arch=$(uname -m)
View
@@ -15,4 +15,5 @@
setup(name = 'lex',
version = '1.0',
description = 'Module to speed up lexers',
include_dirs = ['_build/gen'],
ext_modules = [module])
View
@@ -82,7 +82,15 @@ def main(argv):
except IndexError:
raise RuntimeError('Action required')
if action == 'cpp':
if action == 'c':
# Simple list of defines
from core.id_kind import ID_SPEC
ids = list(ID_SPEC.token_names.iteritems())
ids.sort(key=lambda pair: pair[0]) # Sort by ID
for i, name in ids:
print('#define id__%s %s' % (name, i))
elif action == 'cpp':
# For blog post
try:
labels = argv[2]
Oops, something went wrong.

0 comments on commit f06278c

Please sign in to comment.