| @@ -0,0 +1,309 @@ | ||
| #!/usr/bin/env python | ||
| """ | ||
| py_meta.py | ||
| Parse an ASDL file, and generate Python classes using metaprogramming. | ||
| All objects descends from Obj, which allows them to be dynamically type-checked | ||
| and serialized. Objects hold type descriptors, which are defined in asdl.py. | ||
| Usage: | ||
| from osh import ast_ as ast | ||
| n1 = ast.ArithVar() | ||
| n2 = ast.ArrayLiteralPart() | ||
| API Notes: | ||
| The Python AST module doesn't make any distinction between simple and compound | ||
| sum types. (Simple types have no constructors with fields.) | ||
| C++ has to make this distinction for reasons of representation. It's more | ||
| efficient to hold an enum value than a pointer to a class with an enum value. | ||
| In Python I guess that's not quite true. | ||
| So in order to serialize the correct bytes for C++, our Python metaclass | ||
| implementation has to differ from what's generated by asdl_c.py. More simply | ||
| put: an op is Add() and not Add, an instance of a class, not an integer value. | ||
| """ | ||
| from asdl import asdl_ as asdl | ||
| from asdl import const | ||
| from asdl import format as fmt | ||
| from core import util | ||
| log = util.log | ||
| def _CheckType(value, expected_desc): | ||
| """Is value of type expected_desc? | ||
| Args: | ||
| value: Obj or primitive type | ||
| expected_desc: instance of asdl.Product, asl.Sum, asdl.StrType, | ||
| asdl.IntType, ArrayType, MaybeType, etc. | ||
| """ | ||
| if isinstance(expected_desc, asdl.Constructor): | ||
| # This doesn't make sense because the descriptors are derived from the | ||
| # declared types. You can declare a field as arith_expr_e but not | ||
| # ArithBinary. | ||
| raise AssertionError("Invalid Constructor descriptor") | ||
| if isinstance(expected_desc, asdl.MaybeType): | ||
| if value is None: | ||
| return True | ||
| return _CheckType(value, expected_desc.desc) | ||
| if isinstance(expected_desc, asdl.ArrayType): | ||
| if not isinstance(value, list): | ||
| return False | ||
| # Now check all entries | ||
| for item in value: | ||
| if not _CheckType(item, expected_desc.desc): | ||
| return False | ||
| return True | ||
| if isinstance(expected_desc, asdl.StrType): | ||
| return isinstance(value, str) | ||
| if isinstance(expected_desc, asdl.IntType): | ||
| return isinstance(value, int) | ||
| if isinstance(expected_desc, asdl.BoolType): | ||
| return isinstance(value, bool) | ||
| if isinstance(expected_desc, asdl.UserType): | ||
| return isinstance(value, expected_desc.typ) | ||
| try: | ||
| actual_desc = value.__class__.ASDL_TYPE | ||
| except AttributeError: | ||
| return False # it's not of the right type | ||
| if isinstance(expected_desc, asdl.Product): | ||
| return actual_desc is expected_desc | ||
| if isinstance(expected_desc, asdl.Sum): | ||
| if asdl.is_simple(expected_desc): | ||
| return actual_desc is expected_desc | ||
| else: | ||
| for cons in expected_desc.types: # It has to be one of the alternatives | ||
| #log("CHECKING desc %s against %s" % (desc, cons)) | ||
| if actual_desc is cons: | ||
| return True | ||
| return False | ||
| raise AssertionError( | ||
| 'Invalid descriptor %r: %r' % (expected_desc.__class__, expected_desc)) | ||
| class Obj(object): | ||
| # NOTE: We're using CAPS for these static fields, since they are constant at | ||
| # runtime after metaprogramming. | ||
| ASDL_TYPE = None # Used for type checking | ||
| class SimpleObj(Obj): | ||
| """An enum value. | ||
| Other simple objects: int, str, maybe later a float. | ||
| """ | ||
| def __init__(self, enum_id, name): | ||
| self.enum_id = enum_id | ||
| self.name = name | ||
| # TODO: Why is __hash__ needed? Otherwise native/fastlex_test.py fails. | ||
| # util.Enum required it too. I thought that instances would hash by | ||
| # identity? | ||
| # | ||
| # Example: | ||
| # class bool_arg_type_e(py_meta.SimpleObj): | ||
| # ASDL_TYPE = TYPE_LOOKUP.ByTypeName('bool_arg_type') | ||
| # bool_arg_type_e.Undefined = bool_arg_type_e(1, 'Undefined') | ||
| def __hash__(self): | ||
| # Could it be the integer self.enum_id? | ||
| return hash(self.__class__.__name__ + self.name) | ||
| def __repr__(self): | ||
| return '<%s %s %s>' % (self.__class__.__name__, self.name, self.enum_id) | ||
| class CompoundObj(Obj): | ||
| # TODO: Remove tag? | ||
| # The tag is always set for constructor types, which are subclasses of sum | ||
| # types. Never set for product types. | ||
| tag = None | ||
| # NOTE: SimpleObj could share this. | ||
| def __repr__(self): | ||
| ast_f = fmt.TextOutput(util.Buffer()) # No color by default. | ||
| tree = fmt.MakeTree(self) | ||
| fmt.PrintTree(tree, ast_f) | ||
| s, _ = ast_f.GetRaw() | ||
| return s | ||
| class DebugCompoundObj(CompoundObj): | ||
| """A CompoundObj that does dynamic type checks. | ||
| Used by MakeTypes(). | ||
| """ | ||
| # Always set for constructor types, which are subclasses of sum types. Never | ||
| # set for product types. | ||
| tag = None | ||
| def __init__(self, *args, **kwargs): | ||
| # The user must specify ALL required fields or NONE. | ||
| self._assigned = {f: False for f in self.ASDL_TYPE.GetFieldNames()} | ||
| self._SetDefaults() | ||
| if args or kwargs: | ||
| self._Init(args, kwargs) | ||
| def _SetDefaults(self): | ||
| for name, desc in self.ASDL_TYPE.GetFields(): | ||
| if isinstance(desc, asdl.MaybeType): | ||
| child = desc.desc | ||
| if isinstance(child, asdl.IntType): | ||
| value = const.NO_INTEGER | ||
| elif isinstance(child, asdl.StrType): | ||
| value = '' | ||
| else: | ||
| value = None | ||
| self.__setattr__(name, value) # Maybe values can be None | ||
| elif isinstance(desc, asdl.ArrayType): | ||
| self.__setattr__(name, []) | ||
| def _Init(self, args, kwargs): | ||
| field_names = list(self.ASDL_TYPE.GetFieldNames()) | ||
| for i, val in enumerate(args): | ||
| name = field_names[i] | ||
| self.__setattr__(name, val) | ||
| for name, val in kwargs.items(): | ||
| if self._assigned[name]: | ||
| raise TypeError('Duplicate assignment of field %r' % name) | ||
| self.__setattr__(name, val) | ||
| # Disable type checking here | ||
| #return | ||
| for name in field_names: | ||
| if not self._assigned[name]: | ||
| # If anything was set, then required fields raise an error. | ||
| raise ValueError("Field %r is required and wasn't initialized" % name) | ||
| def CheckUnassigned(self): | ||
| """See if there are unassigned fields, for later encoding. | ||
| This is currently only used in unit tests. | ||
| """ | ||
| unassigned = [] | ||
| for name in self.ASDL_TYPE.GetFieldNames(): | ||
| if not self._assigned[name]: | ||
| desc = self.ASDL_TYPE.LookupFieldType(name) | ||
| if not isinstance(desc, asdl.MaybeType): | ||
| unassigned.append(name) | ||
| if unassigned: | ||
| raise ValueError("Fields %r were't be assigned" % unassigned) | ||
| if 1: # Disable type checking here | ||
| def __setattr__(self, name, value): | ||
| if name == '_assigned': | ||
| self.__dict__[name] = value | ||
| return | ||
| try: | ||
| desc = self.ASDL_TYPE.LookupFieldType(name) | ||
| except KeyError: | ||
| raise AttributeError('Object of type %r has no attribute %r' % | ||
| (self.__class__.__name__, name)) | ||
| if not _CheckType(value, desc): | ||
| raise AssertionError("Field %r should be of type %s, got %r (%s)" % | ||
| (name, desc, value, value.__class__)) | ||
| self._assigned[name] = True # check this later when encoding | ||
| self.__dict__[name] = value | ||
| def MakeTypes(module, root, type_lookup): | ||
| """ | ||
| Args: | ||
| module: asdl.Module | ||
| root: an object/package to add types to | ||
| """ | ||
| for defn in module.dfns: | ||
| typ = defn.value | ||
| #print('TYPE', defn.name, typ) | ||
| if isinstance(typ, asdl.Sum): | ||
| sum_type = typ | ||
| if asdl.is_simple(sum_type): | ||
| # An object without fields, which can be stored inline. | ||
| # Create a class called foo_e. Unlike the CompoundObj case, it doesn't | ||
| # have subtypes. Instead if has attributes foo_e.Bar, which Bar is an | ||
| # instance of foo_e. | ||
| # | ||
| # Problem: This means you have a dichotomy between: | ||
| # cflow_e.Break vs. cflow_e.Break() | ||
| # If you add a non-simple type like cflow_e.Return(5), the usage will | ||
| # change. I haven't run into this problem in practice yet. | ||
| class_name = defn.name + '_e' | ||
| class_attr = {'ASDL_TYPE': sum_type} # asdl.Sum | ||
| cls = type(class_name, (SimpleObj, ), class_attr) | ||
| setattr(root, class_name, cls) | ||
| # NOTE: Right now the ASDL_TYPE for for an enum value is the Sum type, | ||
| # not the Constructor type. We may want to change this if we need | ||
| # reflection. | ||
| for i, cons in enumerate(sum_type.types): | ||
| enum_id = i + 1 | ||
| name = cons.name | ||
| val = cls(enum_id, cons.name) # Instantiate SimpleObj subtype | ||
| # Set a static attribute like op_id.Plus, op_id.Minus. | ||
| setattr(cls, name, val) | ||
| else: | ||
| tag_num = {} | ||
| # e.g. for arith_expr | ||
| # Should this be arith_expr_t? It is in C++. | ||
| base_class = type(defn.name, (DebugCompoundObj, ), {}) | ||
| setattr(root, defn.name, base_class) | ||
| # Make a type and a enum tag for each alternative. | ||
| for i, cons in enumerate(sum_type.types): | ||
| tag = i + 1 # zero reserved? | ||
| tag_num[cons.name] = tag # for enum | ||
| class_attr = { | ||
| 'ASDL_TYPE': cons, # asdl.Constructor | ||
| 'tag': tag, # Does this API change? | ||
| } | ||
| cls = type(cons.name, (base_class, ), class_attr) | ||
| setattr(root, cons.name, cls) | ||
| # e.g. arith_expr_e.Const == 1 | ||
| enum_name = defn.name + '_e' | ||
| tag_enum = type(enum_name, (), tag_num) | ||
| setattr(root, enum_name, tag_enum) | ||
| elif isinstance(typ, asdl.Product): | ||
| class_attr = {'ASDL_TYPE': typ} | ||
| cls = type(defn.name, (DebugCompoundObj, ), class_attr) | ||
| setattr(root, defn.name, cls) | ||
| else: | ||
| raise AssertionError(typ) | ||
| def AssignTypes(src_module, dest_module): | ||
| """For generated code.""" | ||
| for name in dir(src_module): | ||
| if not name.startswith('__'): | ||
| v = getattr(src_module, name) | ||
| setattr(dest_module, name, v) | ||
| @@ -0,0 +1,18 @@ | ||
| #!/usr/bin/env python | ||
| from __future__ import print_function | ||
| """ | ||
| py_meta_test.py: Tests for py_meta.py | ||
| """ | ||
| import unittest | ||
| from asdl import py_meta # module under test | ||
| class AsdlTest(unittest.TestCase): | ||
| def testPyMeta(self): | ||
| print(py_meta) | ||
| if __name__ == '__main__': | ||
| unittest.main() |
| @@ -0,0 +1,230 @@ | ||
| #!/usr/bin/env python | ||
| """ | ||
| tdop.py | ||
| """ | ||
| import re | ||
| class ParseError(Exception): | ||
| pass | ||
| # | ||
| # Default parsing functions give errors | ||
| # | ||
| def NullError(p, token, bp): | ||
| raise ParseError("%s can't be used in prefix position" % token) | ||
| def LeftError(p, token, left, rbp): | ||
| # Hm is this not called because of binding power? | ||
| raise ParseError("%s can't be used in infix position" % token) | ||
| # | ||
| # Input | ||
| # | ||
| class Token: | ||
| def __init__(self, type, val, loc=None): | ||
| self.type = type | ||
| self.val = val | ||
| def __repr__(self): | ||
| return '<Token %s %s>' % (self.type, self.val) | ||
| # | ||
| # Using the pattern here: http://effbot.org/zone/xml-scanner.htm | ||
| # | ||
| # NOTE: () and [] need to be on their own so (-1+2) works | ||
| TOKEN_RE = re.compile(""" | ||
| \s* (?: (\d+) | (\w+) | ( [\-\+\*/%!~<>=&^|?:,]+ ) | ([\(\)\[\]]) ) | ||
| """, re.VERBOSE) | ||
| def Tokenize(s): | ||
| for item in TOKEN_RE.findall(s): | ||
| if item[0]: | ||
| typ = 'number' | ||
| val = int(item[0]) | ||
| elif item[1]: | ||
| typ = 'name' | ||
| val = item[1] | ||
| elif item[2]: | ||
| typ = item[2] | ||
| val = item[2] | ||
| elif item[3]: | ||
| typ = item[3] | ||
| val = item[3] | ||
| yield Token(typ, val, loc=(0, 0)) | ||
| # | ||
| # Simple and Composite AST nodes | ||
| # | ||
| class Node(object): | ||
| def __init__(self, token): | ||
| """ | ||
| Args: | ||
| type: token type (operator, etc.) | ||
| val: token val, only important for number and string | ||
| """ | ||
| self.token = token | ||
| def __repr__(self): | ||
| return str(self.token.val) | ||
| class CompositeNode(Node): | ||
| def __init__(self, token, children): | ||
| """ | ||
| Args: | ||
| type: token type (operator, etc.) | ||
| """ | ||
| Node.__init__(self, token) | ||
| self.children = children | ||
| def __repr__(self): | ||
| args = ''.join([" " + repr(c) for c in self.children]) | ||
| return "(" + self.token.type + args + ")" | ||
| # | ||
| # Parser definition | ||
| # | ||
| class LeftInfo(object): | ||
| """Row for operator. | ||
| In C++ this should be a big array. | ||
| """ | ||
| def __init__(self, led=None, lbp=0, rbp=0): | ||
| self.led = led or LeftError | ||
| self.lbp = lbp | ||
| self.rbp = rbp | ||
| class NullInfo(object): | ||
| """Row for operator. | ||
| In C++ this should be a big array. | ||
| """ | ||
| def __init__(self, nud=None, bp=0): | ||
| self.nud = nud or NullError | ||
| self.bp = bp | ||
| class ParserSpec(object): | ||
| """Specification for a TDOP parser.""" | ||
| def __init__(self): | ||
| self.null_lookup = {} | ||
| self.left_lookup = {} | ||
| def Null(self, bp, nud, tokens): | ||
| """Register a token that doesn't take anything on the left. | ||
| Examples: constant, prefix operator, error. | ||
| """ | ||
| for token in tokens: | ||
| self.null_lookup[token] = NullInfo(nud=nud, bp=bp) | ||
| if token not in self.left_lookup: | ||
| self.left_lookup[token] = LeftInfo() # error | ||
| def _RegisterLed(self, lbp, rbp, led, tokens): | ||
| for token in tokens: | ||
| if token not in self.null_lookup: | ||
| self.null_lookup[token] = NullInfo(NullError) | ||
| self.left_lookup[token] = LeftInfo(lbp=lbp, rbp=rbp, led=led) | ||
| def Left(self, bp, led, tokens): | ||
| """Register a token that takes an expression on the left.""" | ||
| self._RegisterLed(bp, bp, led, tokens) | ||
| def LeftRightAssoc(self, bp, led, tokens): | ||
| """Register a right associative operator.""" | ||
| self._RegisterLed(bp, bp-1, led, tokens) | ||
| def LookupNull(self, token): | ||
| """Get the parsing function and precedence for a null position token.""" | ||
| try: | ||
| nud = self.null_lookup[token] | ||
| except KeyError: | ||
| raise ParseError('Unexpected token %r' % token) | ||
| return nud | ||
| def LookupLeft(self, token): | ||
| """Get the parsing function and precedence for a left position token.""" | ||
| try: | ||
| led = self.left_lookup[token] | ||
| except KeyError: | ||
| raise ParseError('Unexpected token %r' % token) | ||
| return led | ||
| EOF_TOKEN = Token('eof', 'eof') | ||
| class Parser(object): | ||
| """Recursive TDOP parser.""" | ||
| def __init__(self, spec, lexer): | ||
| self.spec = spec | ||
| self.lexer = lexer # iterable | ||
| self.token = None # current token | ||
| def AtToken(self, token_type): | ||
| """Test if we are looking at a token.""" | ||
| return self.token.type == token_type | ||
| def Next(self): | ||
| """Move to the next token.""" | ||
| try: | ||
| t = self.lexer.next() | ||
| except StopIteration: | ||
| t = EOF_TOKEN | ||
| self.token = t | ||
| def Eat(self, val): | ||
| """Assert the value of the current token, then move to the next token.""" | ||
| if val and not self.AtToken(val): | ||
| raise ParseError('expected %s, got %s' % (val, self.token)) | ||
| self.Next() | ||
| def ParseUntil(self, rbp): | ||
| """ | ||
| Parse to the right, eating tokens until we encounter a token with binding | ||
| power LESS THAN OR EQUAL TO rbp. | ||
| """ | ||
| if self.AtToken('eof'): | ||
| raise ParseError('Unexpected end of input') | ||
| t = self.token | ||
| self.Next() # skip over the token, e.g. ! ~ + - | ||
| null_info = self.spec.LookupNull(t.type) | ||
| node = null_info.nud(self, t, null_info.bp) | ||
| while True: | ||
| t = self.token | ||
| left_info = self.spec.LookupLeft(t.type) | ||
| # Examples: | ||
| # If we see 1*2+ , rbp = 27 and lbp = 25, so stop. | ||
| # If we see 1+2+ , rbp = 25 and lbp = 25, so stop. | ||
| # If we see 1**2**, rbp = 26 and lbp = 27, so keep going. | ||
| if rbp >= left_info.lbp: | ||
| break | ||
| self.Next() # skip over the token, e.g. / * | ||
| node = left_info.led(self, t, node, left_info.rbp) | ||
| return node | ||
| def Parse(self): | ||
| self.Next() | ||
| return self.ParseUntil(0) |
| @@ -0,0 +1,118 @@ | ||
| #!/usr/bin/python | ||
| """ | ||
| visitor.py | ||
| """ | ||
| from asdl import asdl_ as asdl | ||
| class AsdlVisitor: | ||
| """Base class for visitors. | ||
| TODO: | ||
| - It might be useful to separate this into VisitChildren() / generic_visit() | ||
| like Python's ast.NodeVisitor does. | ||
| - Also remove self.f and self.Emit. Those can go in self.output? | ||
| - Move to common location, since gen_python uses it as well. | ||
| """ | ||
| def __init__(self, f): | ||
| self.f = f | ||
| def Emit(self, s, depth, reflow=True): | ||
| for line in FormatLines(s, depth): | ||
| self.f.write(line) | ||
| def VisitModule(self, mod): | ||
| for dfn in mod.dfns: | ||
| self.VisitType(dfn) | ||
| self.EmitFooter() | ||
| def VisitType(self, typ, depth=0): | ||
| if isinstance(typ.value, asdl.Sum): | ||
| self.VisitSum(typ.value, typ.name, depth) | ||
| elif isinstance(typ.value, asdl.Product): | ||
| self.VisitProduct(typ.value, typ.name, depth) | ||
| else: | ||
| raise AssertionError(typ) | ||
| def VisitSum(self, sum, name, depth): | ||
| if asdl.is_simple(sum): | ||
| self.VisitSimpleSum(sum, name, depth) | ||
| else: | ||
| self.VisitCompoundSum(sum, name, depth) | ||
| # Optionally overridden. | ||
| def VisitProduct(self, value, name, depth): | ||
| pass | ||
| def VisitSimpleSum(self, value, name, depth): | ||
| pass | ||
| def VisitCompoundSum(self, value, name, depth): | ||
| pass | ||
| def EmitFooter(self): | ||
| pass | ||
| TABSIZE = 2 | ||
| MAX_COL = 80 | ||
| # Copied from asdl_c.py | ||
| def _ReflowLines(s, depth): | ||
| """Reflow the line s indented depth tabs. | ||
| Return a sequence of lines where no line extends beyond MAX_COL when properly | ||
| indented. The first line is properly indented based exclusively on depth * | ||
| TABSIZE. All following lines -- these are the reflowed lines generated by | ||
| this function -- start at the same column as the first character beyond the | ||
| opening { in the first line. | ||
| """ | ||
| size = MAX_COL - depth * TABSIZE | ||
| if len(s) < size: | ||
| return [s] | ||
| lines = [] | ||
| cur = s | ||
| padding = "" | ||
| while len(cur) > size: | ||
| i = cur.rfind(' ', 0, size) | ||
| # XXX this should be fixed for real | ||
| if i == -1 and 'GeneratorExp' in cur: | ||
| i = size + 3 | ||
| assert i != -1, "Impossible line %d to reflow: %r" % (size, s) | ||
| lines.append(padding + cur[:i]) | ||
| if len(lines) == 1: | ||
| # find new size based on brace | ||
| j = cur.find('{', 0, i) | ||
| if j >= 0: | ||
| j += 2 # account for the brace and the space after it | ||
| size -= j | ||
| padding = " " * j | ||
| else: | ||
| j = cur.find('(', 0, i) | ||
| if j >= 0: | ||
| j += 1 # account for the paren (no space after it) | ||
| size -= j | ||
| padding = " " * j | ||
| cur = cur[i + 1:] | ||
| else: | ||
| lines.append(padding + cur) | ||
| return lines | ||
| def FormatLines(s, depth, reflow=True): | ||
| """Make the generated code readable. | ||
| Args: | ||
| depth: controls indentation | ||
| reflow: line wrapping. | ||
| """ | ||
| if reflow: | ||
| lines = _ReflowLines(s, depth) | ||
| else: | ||
| lines = [s] | ||
| result = [] | ||
| for line in lines: | ||
| line = (" " * TABSIZE * depth) + line + "\n" | ||
| result.append(line) | ||
| return result |
| @@ -0,0 +1,66 @@ | ||
| from __future__ import print_function # for OPy compiler | ||
| """ | ||
| atexit.py - allow programmer to define multiple exit functions to be executed | ||
| upon normal program termination. | ||
| One public function, register, is defined. | ||
| """ | ||
| __all__ = ["register"] | ||
| import sys | ||
| _exithandlers = [] | ||
| def _run_exitfuncs(): | ||
| """run any registered exit functions | ||
| _exithandlers is traversed in reverse order so functions are executed | ||
| last in, first out. | ||
| """ | ||
| exc_info = None | ||
| while _exithandlers: | ||
| func, targs, kargs = _exithandlers.pop() | ||
| try: | ||
| func(*targs, **kargs) | ||
| except SystemExit: | ||
| exc_info = sys.exc_info() | ||
| except: | ||
| import traceback | ||
| print("Error in atexit._run_exitfuncs:", file=sys.stderr) | ||
| traceback.print_exc() | ||
| exc_info = sys.exc_info() | ||
| if exc_info is not None: | ||
| raise exc_info[0], exc_info[1], exc_info[2] | ||
| def register(func, *targs, **kargs): | ||
| """register a function to be executed upon normal program termination | ||
| func - function to be called at exit | ||
| targs - optional arguments to pass to func | ||
| kargs - optional keyword arguments to pass to func | ||
| func is returned to facilitate usage as a decorator. | ||
| """ | ||
| _exithandlers.append((func, targs, kargs)) | ||
| return func | ||
| if hasattr(sys, "exitfunc"): | ||
| # Assume it's another registered exit function - append it to our list | ||
| register(sys.exitfunc) | ||
| sys.exitfunc = _run_exitfuncs | ||
| if __name__ == "__main__": | ||
| def x1(): | ||
| print("running x1") | ||
| def x2(n): | ||
| print("running x2(%r)" % (n,)) | ||
| def x3(n, kwd=None): | ||
| print("running x3(%r, kwd=%r)" % (n, kwd)) | ||
| register(x1) | ||
| register(x2, 12) | ||
| register(x3, 5, "bar") | ||
| register(x3, "no kwd args") |
| @@ -0,0 +1,14 @@ | ||
| #!/usr/bin/env python | ||
| """ | ||
| fake_libc.py | ||
| For PyPy. | ||
| """ | ||
| def regex_parse(regex_str): | ||
| return True | ||
| # This makes things fall through to the first case statement... | ||
| def fnmatch(s, to_match): | ||
| return True | ||
| @@ -0,0 +1,93 @@ | ||
| #!/usr/bin/env python | ||
| from __future__ import print_function | ||
| """ | ||
| pytrace.py | ||
| """ | ||
| import cStringIO | ||
| import os | ||
| import struct | ||
| import sys | ||
| # TODO: Two kinds of tracing? | ||
| # - FullTracer -> Chrome trace? | ||
| # - ReservoirSamplingTracer() -- flame graph that is deterministic? | ||
| # TODO: Check this in but just go ahead and fix wild.sh instead. | ||
| class Tracer(object): | ||
| # Limit to 10M events by default. | ||
| def __init__(self, max_events=10e6): | ||
| self.pid = os.getpid() | ||
| # append | ||
| self.event_strs = cStringIO.StringIO() | ||
| # After max_events we stop recording | ||
| self.max_events = max_events | ||
| self.num_events = 0 | ||
| self.depth = 0 | ||
| # Python VM callback | ||
| def OnEvent(self, frame, event_type, arg): | ||
| # Test overhead | ||
| # 7.5 seconds. Geez. That's crazy high. | ||
| # The baseline is 2.7 seconds, and _lsprof takes 3.8 seconds. | ||
| # I guess that's why pytracing is a decorator and only works on one part of | ||
| # the program. | ||
| # pytracing isn't usable with large programs. It can't run abuild -h. | ||
| # What I really want is the nicer visualization. I don't want the silly | ||
| # cProfile output. | ||
| self.num_events += 1 | ||
| name = frame.f_code.co_name | ||
| filename = frame.f_code.co_filename | ||
| if event_type in ('call', 'c_call'): | ||
| self.depth += 1 | ||
| record = '%s%s\t%s\t%s\t%s\t%s\n' % (' ' * self.depth, | ||
| event_type, filename, frame.f_lineno, name, arg) | ||
| self.event_strs.write(record) | ||
| if event_type in ('return', 'c_return'): | ||
| self.depth -= 1 | ||
| return | ||
| # NOTE: Do we want a struct.pack version eventually? | ||
| #self.event_strs.write('') | ||
| def Start(self): | ||
| sys.setprofile(self.OnEvent) | ||
| def Stop(self, path): | ||
| sys.setprofile(None) | ||
| # Only one process should write out the file! | ||
| if os.getpid() != self.pid: | ||
| return | ||
| # TODO: | ||
| # - report number of events? | ||
| # - report number of bytes? | ||
| print('num_events: %d' % self.num_events, file=sys.stderr) | ||
| print('Writing to %r' % path, file=sys.stderr) | ||
| with open(path, 'w') as f: | ||
| f.write(self.event_strs.getvalue()) | ||
| def main(argv): | ||
| t = Tracer() | ||
| import urlparse | ||
| t.Start() | ||
| print(urlparse.urlparse('http://example.com/foo')) | ||
| t.Stop('demo.pytrace') | ||
| if __name__ == '__main__': | ||
| try: | ||
| main(sys.argv) | ||
| except RuntimeError as e: | ||
| print >>sys.stderr, 'FATAL: %s' % e | ||
| sys.exit(1) |
| @@ -0,0 +1,67 @@ | ||
| #!/usr/bin/env python | ||
| """ | ||
| time.py -- Replacement for coreutils 'time'. | ||
| The interface of this program is modelled after: | ||
| /usr/bin/time --append --output foo.txt --format '%x %e' | ||
| Problems with /usr/bin/time: | ||
| - elapsed time only has 2 digits of precision | ||
| Problems with bash time builtin | ||
| - has no way to get the exit code | ||
| - writes to stderr, so you it's annoying to get both process stderr and | ||
| and | ||
| This program also writes CSV directly, so you can have commas in fields, etc. | ||
| """ | ||
| import csv | ||
| import optparse | ||
| import sys | ||
| import subprocess | ||
| import time | ||
| def Options(): | ||
| """Returns an option parser instance.""" | ||
| p = optparse.OptionParser('time.py [options] ARGV...') | ||
| p.add_option( | ||
| '--tsv', dest='tsv', default=False, action='store_true', | ||
| help='Write output in TSV format') | ||
| p.add_option( | ||
| '-o', '--output', dest='output', default=None, | ||
| help='Name of output file to write to') | ||
| p.add_option( | ||
| '--field', dest='fields', default=[], action='append', | ||
| help='A string to append to each row, after the exit code and status') | ||
| return p | ||
| def main(argv): | ||
| (opts, child_argv) = Options().parse_args(argv[1:]) | ||
| start_time = time.time() | ||
| exit_code = subprocess.call(child_argv) | ||
| elapsed = time.time() - start_time | ||
| fields = tuple(opts.fields) | ||
| with open(opts.output, 'a') as f: | ||
| if opts.tsv: | ||
| # TSV output. | ||
| out = csv.writer(f, delimiter='\t', doublequote=False, | ||
| quoting=csv.QUOTE_NONE) | ||
| else: | ||
| out = csv.writer(f) | ||
| row = (exit_code, '%.4f' % elapsed) + fields | ||
| out.writerow(row) | ||
| # Preserve the command's exit code. (This means you can't distinguish | ||
| # between a failure of time.py and the command, but that's better than | ||
| # swallowing the error.) | ||
| return exit_code | ||
| if __name__ == '__main__': | ||
| sys.exit(main(sys.argv)) |
| @@ -0,0 +1,104 @@ | ||
| #!/usr/bin/env python | ||
| """ | ||
| virtual_memory.py | ||
| """ | ||
| import csv | ||
| import os | ||
| import sys | ||
| import re | ||
| # VmSize, VmData might be interesting too. | ||
| METRIC_RE = re.compile('^(VmPeak|VmRSS):\s*(\d+)') | ||
| def main(argv): | ||
| action = argv[1] | ||
| if action == 'baseline': | ||
| input_dirs = argv[2:] | ||
| out = csv.writer(sys.stdout) | ||
| out.writerow( | ||
| ('host', 'shell_name', 'shell_hash', 'metric_name', 'metric_value')) | ||
| # Dir name looks like "$host.$job_id" | ||
| for input_dir in input_dirs: | ||
| d = os.path.basename(input_dir) | ||
| host, job_id = d.split('.') | ||
| for name in os.listdir(input_dir): | ||
| n, _ = os.path.splitext(name) | ||
| shell_name, shell_hash = n.split('-') | ||
| path = os.path.join(input_dir, name) | ||
| with open(path) as f: | ||
| for line in f: | ||
| m = METRIC_RE.match(line) | ||
| if m: | ||
| name, value = m.groups() | ||
| row = (host, shell_name, shell_hash, name, value) | ||
| out.writerow(row) | ||
| elif action == 'osh-parser': | ||
| input_dirs = argv[2:] | ||
| out = csv.writer(sys.stdout) | ||
| HEADER = ( | ||
| 'host', 'shell_name', 'shell_hash', 'filename', 'metric_name', | ||
| 'metric_value') | ||
| out.writerow(HEADER) | ||
| for input_dir in input_dirs: | ||
| d = os.path.basename(input_dir) | ||
| host, job_id, _ = d.split('.') | ||
| for name in os.listdir(input_dir): | ||
| n, _ = os.path.splitext(name) | ||
| shell_id, filename = n.split('__') | ||
| shell_name, shell_hash = shell_id.split('-') | ||
| path = os.path.join(input_dir, name) | ||
| with open(path) as f: | ||
| for line in f: | ||
| m = METRIC_RE.match(line) | ||
| if m: | ||
| name, value = m.groups() | ||
| row = (host, shell_name, shell_hash, filename, name, value) | ||
| out.writerow(row) | ||
| elif action == 'osh-runtime': | ||
| # NOTE: This is mostly a copy/paste of osh-parser. | ||
| input_dirs = argv[2:] | ||
| out = csv.writer(sys.stdout) | ||
| HEADER = ( | ||
| 'host', 'shell_name', 'shell_hash', 'task_arg', 'event', 'metric_name', | ||
| 'metric_value') | ||
| out.writerow(HEADER) | ||
| for input_dir in input_dirs: | ||
| d = os.path.basename(input_dir) | ||
| host, job_id, _ = d.split('.') | ||
| for name in os.listdir(input_dir): | ||
| n, _ = os.path.splitext(name) | ||
| shell_id, task_arg, event = n.split('__') | ||
| shell_name, shell_hash = shell_id.split('-') | ||
| path = os.path.join(input_dir, name) | ||
| with open(path) as f: | ||
| for line in f: | ||
| m = METRIC_RE.match(line) | ||
| if m: | ||
| name, value = m.groups() | ||
| row = (host, shell_name, shell_hash, task_arg, event, name, | ||
| value) | ||
| out.writerow(row) | ||
| else: | ||
| raise RuntimeError('Invalid action %r' % action) | ||
| if __name__ == '__main__': | ||
| try: | ||
| main(sys.argv) | ||
| except RuntimeError as e: | ||
| print >>sys.stderr, 'FATAL: %s' % e | ||
| sys.exit(1) |
| @@ -0,0 +1,88 @@ | ||
| #!/usr/bin/env python | ||
| from __future__ import print_function | ||
| """ | ||
| opy_.py | ||
| """ | ||
| import os | ||
| import sys | ||
| this_dir = os.path.dirname(os.path.abspath(sys.argv[0])) | ||
| sys.path.append(os.path.join(this_dir, '..')) | ||
| from core import args | ||
| from core import util | ||
| from opy.util_opy import log | ||
| from opy import opy_main | ||
| # TODO: move to quick ref? | ||
| _OPY_USAGE = 'Usage: opy_ MAIN [OPTION]... [ARG]...' | ||
| def _ShowVersion(): | ||
| util.ShowAppVersion('OPy') | ||
| # Run the bytecode too. Should this have an option to use byterun? | ||
| def OpyMain(argv0, main_argv): | ||
| raise NotImplementedError("Can't run bytecode yet") | ||
| def AppBundleMain(argv): | ||
| b = os.path.basename(argv[0]) | ||
| main_name, ext = os.path.splitext(b) | ||
| if main_name in ('opy_', 'opy') and ext: # opy_.py or opy.ovm | ||
| try: | ||
| first_arg = argv[1] | ||
| except IndexError: | ||
| raise args.UsageError('Missing required applet name.') | ||
| # TODO: We don't have this | ||
| if first_arg in ('-h', '--help'): | ||
| #builtin.Help(['bundle-usage'], util.GetResourceLoader()) | ||
| raise NotImplementedError('OPy help not implemented') | ||
| sys.exit(0) | ||
| if first_arg in ('-V', '--version'): | ||
| _ShowVersion() | ||
| sys.exit(0) | ||
| main_name = first_arg | ||
| argv0 = argv[1] | ||
| main_argv = argv[2:] | ||
| else: | ||
| argv0 = argv[0] | ||
| main_argv = argv[1:] | ||
| if main_name == 'opy': | ||
| status = OpyMain(argv0, main_argv) | ||
| return status | ||
| elif main_name == 'opyc': | ||
| return opy_main.OpyCommandMain(main_argv) | ||
| else: | ||
| raise args.UsageError('Invalid applet name %r.' % main_name) | ||
| def main(argv): | ||
| try: | ||
| sys.exit(AppBundleMain(argv)) | ||
| except args.UsageError as e: | ||
| #print(_OPY_USAGE, file=sys.stderr) | ||
| log('opy: %s', e) | ||
| sys.exit(2) | ||
| except RuntimeError as e: | ||
| log('FATAL: %s', e) | ||
| sys.exit(1) | ||
| if __name__ == '__main__': | ||
| # NOTE: This could end up as opy.InferTypes(), opy.GenerateCode(), etc. | ||
| if os.getenv('CALLGRAPH') == '1': | ||
| from opy import callgraph | ||
| callgraph.Walk(main, sys.modules) | ||
| else: | ||
| main(sys.argv) |
| @@ -0,0 +1,148 @@ | ||
| #!/usr/bin/python -S | ||
| from __future__ import print_function | ||
| """ | ||
| py_deps.py | ||
| Dynamically discover Python and C modules. We import the main module and | ||
| inspect sys.modules before and after. That is, we use the exact logic that the | ||
| Python interpreter does. | ||
| Usage: | ||
| PYTHONPATH=... py_deps.py <main module> | ||
| IMPORTANT: Run this script with -S so that system libraries aren't found. | ||
| """ | ||
| import sys | ||
| OLD_MODULES = dict(sys.modules) # Make a copy | ||
| import os # Do it here so we don't mess up analysis | ||
| def log(msg, *args): | ||
| if args: | ||
| msg = msg % args | ||
| print('\t', msg, file=sys.stderr) | ||
| def ImportMain(main_module, old_modules): | ||
| """Yields (module name, absolute path) pairs.""" | ||
| log('Importing %r', main_module) | ||
| try: | ||
| __import__(main_module) | ||
| except ImportError, e: | ||
| log('Error importing %r with sys.path %r', main_module, sys.path) | ||
| # TODO: print better error. | ||
| raise | ||
| new_modules = sys.modules | ||
| log('After importing: %d modules', len(new_modules)) | ||
| for name in sorted(new_modules): | ||
| if name in old_modules: | ||
| continue # exclude old modules | ||
| module = new_modules[name] | ||
| full_path = getattr(module, '__file__', None) | ||
| # For some reason, there are entries like: | ||
| # 'pan.core.os': None in sys.modules. Here's a hack to get rid of them. | ||
| if module is None: | ||
| continue | ||
| # Not sure why, but some stdlib modules don't have a __file__ attribute, | ||
| # e.g. "gc", "marshal", "thread". Doesn't matter for our purposes. | ||
| if full_path is None: | ||
| continue | ||
| yield name, full_path | ||
| PY_MODULE = 0 | ||
| C_MODULE = 1 | ||
| def FilterModules(modules): | ||
| """Look at __file__ of each module, and classify them as Python or C.""" | ||
| for module, full_path in modules: | ||
| #print 'OLD', module, full_path | ||
| num_parts = module.count('.') + 1 | ||
| i = len(full_path) | ||
| # Do it once more in this case | ||
| if full_path.endswith('/__init__.pyc') or \ | ||
| full_path.endswith('__init__.py'): | ||
| i = full_path.rfind('/', 0, i) | ||
| for _ in xrange(num_parts): | ||
| i = full_path.rfind('/', 0, i) | ||
| #print i, full_path[i+1:] | ||
| rel_path = full_path[i + 1:] | ||
| # Depending on whether it's cached, the __file__ attribute on the module | ||
| # ends with '.py' or '.pyc'. | ||
| if full_path.endswith('.py'): | ||
| yield PY_MODULE, full_path, rel_path | ||
| elif full_path.endswith('.pyc'): | ||
| yield PY_MODULE, full_path[:-1], rel_path[:-1] | ||
| else: | ||
| # .so file | ||
| yield C_MODULE, module, full_path | ||
| # TODO: Get rid of this? | ||
| def CreateOptionsParser(): | ||
| parser = optparse.OptionParser() | ||
| return parser | ||
| def main(argv): | ||
| """Returns an exit code.""" | ||
| #(opts, argv) = CreateOptionsParser().parse_args(argv) | ||
| #if not argv: | ||
| # raise Error('No modules specified.') | ||
| # Set an environment variable so dependencies in debug mode can be excluded. | ||
| os.environ['_OVM_DEPS'] = '1' | ||
| action = argv[1] | ||
| main_module = argv[2] | ||
| log('Before importing: %d modules', len(OLD_MODULES)) | ||
| if action == 'both': # Write files for both .py and .so dependencies | ||
| prefix = argv[3] | ||
| py_out_path = prefix + '-cpython.txt' | ||
| c_out_path = prefix + '-c.txt' | ||
| modules = ImportMain(main_module, OLD_MODULES) | ||
| with open(py_out_path, 'w') as py_out, open(c_out_path, 'w') as c_out: | ||
| for mod_type, x, y in FilterModules(modules): | ||
| if mod_type == PY_MODULE: | ||
| print(x, y, file=py_out) | ||
| print(x + 'c', y + 'c', file=py_out) # .pyc goes in bytecode.zip too | ||
| elif mod_type == C_MODULE: | ||
| print(x, y, file=c_out) # mod_name, full_path | ||
| else: | ||
| raise AssertionError(mod_type) | ||
| elif action == 'py': # Just .py files | ||
| modules = ImportMain(main_module, OLD_MODULES) | ||
| for mod_type, full_path, rel_path in FilterModules(modules): | ||
| if mod_type == PY_MODULE: | ||
| opy_input = full_path | ||
| opy_output = rel_path + 'c' # output is .pyc | ||
| print(opy_input, opy_output) | ||
| else: | ||
| raise RuntimeError('Invalid action %r' % action) | ||
| if __name__ == '__main__': | ||
| try: | ||
| sys.exit(main(sys.argv)) | ||
| except RuntimeError as e: | ||
| print('%s: %s' % (sys.argv[0], e.args[0]), file=sys.stderr) | ||
| sys.exit(1) |
| @@ -0,0 +1,27 @@ | ||
| #!/usr/bin/env python | ||
| from __future__ import print_function | ||
| """ | ||
| app_deps_test.py: Tests for app_deps.py | ||
| """ | ||
| import sys | ||
| import unittest | ||
| import app_deps # module under test | ||
| class AppDepsTest(unittest.TestCase): | ||
| def testModules(self): | ||
| pairs = [ | ||
| ('poly.util', 'poly/util.py'), | ||
| ('core.libc', '/git/oil/core/libc.so'), | ||
| ('simplejson', | ||
| '/home/andy/dev/simplejson-2.1.5/simplejson/__init__.py') | ||
| ] | ||
| for mod_type, x, y in app_deps.FilterModules(pairs): | ||
| print(mod_type, x, y) | ||
| if __name__ == '__main__': | ||
| unittest.main() |
| @@ -0,0 +1,59 @@ | ||
| #!/usr/bin/env python | ||
| from __future__ import print_function | ||
| """ | ||
| c_module_srcs.py | ||
| """ | ||
| import sys | ||
| def main(argv): | ||
| manifest_path = argv[1] | ||
| discovered = argv[2] | ||
| manifest = {} | ||
| with open(manifest_path) as f: | ||
| for line in f: | ||
| line = line.strip() | ||
| mod_name, rel_path = line.split(None, 2) | ||
| manifest[mod_name] = rel_path | ||
| #print manifest | ||
| with open(discovered) as f: | ||
| for line in f: | ||
| line = line.strip() | ||
| mod_name, _ = line.split(None, 2) | ||
| # Hard-coded special cases for now. | ||
| if mod_name in ('libc', 'fastlex'): # Our own modules | ||
| # Relative to Python-2.7.13 dir | ||
| print('../native/%s.c' % mod_name) | ||
| elif mod_name == 'math': | ||
| print('Modules/mathmodule.c') | ||
| print('Modules/_math.c') | ||
| elif mod_name == '_io': | ||
| # This data is in setup.py and Modules/Setup.dist. | ||
| #_io -I$(srcdir)/Modules/_io _io/bufferedio.c _io/bytesio.c | ||
| # _io/fileio.c _io/iobase.c _io/_iomodule.c _io/stringio.c | ||
| # _io/textio.c | ||
| print('Modules/_io/bufferedio.c') | ||
| print('Modules/_io/bytesio.c') | ||
| print('Modules/_io/fileio.c') | ||
| print('Modules/_io/iobase.c') | ||
| print('Modules/_io/_iomodule.c') | ||
| print('Modules/_io/stringio.c') | ||
| print('Modules/_io/textio.c') | ||
| else: | ||
| print(manifest[mod_name]) | ||
| if __name__ == '__main__': | ||
| try: | ||
| main(sys.argv) | ||
| except RuntimeError as e: | ||
| print('FATAL: %s' % e, file=sys.stderr) | ||
| sys.exit(1) |
| @@ -0,0 +1,40 @@ | ||
| #!/usr/bin/env python | ||
| from __future__ import print_function | ||
| """ | ||
| c_module_toc.py | ||
| """ | ||
| import glob | ||
| import re | ||
| import sys | ||
| PURE_C_RE = re.compile(r'.*/(.*)module\.c$') | ||
| HELPER_C_RE = re.compile(r'.*/(.*)\.c$') | ||
| def main(argv): | ||
| # module name -> list of paths to include | ||
| c_module_srcs = {} | ||
| for c_path in glob.glob('Modules/*.c') + glob.glob('Modules/_io/*.c'): | ||
| m = PURE_C_RE.match(c_path) | ||
| if m: | ||
| print(m.group(1), c_path) | ||
| continue | ||
| m = HELPER_C_RE.match(c_path) | ||
| if m: | ||
| name = m.group(1) | ||
| # Special case: | ||
| if name == '_hashopenssl': | ||
| name = '_hashlib' | ||
| print(name, c_path) | ||
| if __name__ == '__main__': | ||
| try: | ||
| main(sys.argv) | ||
| except RuntimeError as e: | ||
| print('FATAL: %s' % e, file=sys.stderr) | ||
| sys.exit(1) |
| @@ -0,0 +1,51 @@ | ||
| #!/usr/bin/env python | ||
| """ | ||
| make_zip.py | ||
| Takes a list of manifests and merges them into a zip file. | ||
| """ | ||
| import sys | ||
| import zipfile | ||
| def main(argv): | ||
| # Write input files to a .zip | ||
| out_path = argv[1] | ||
| # NOTE: Startup is ~3 ms faster WITHOUT compression. 38 ms. vs 41. ms. | ||
| #mode = zipfile.ZIP_DEFLATED | ||
| # Increase size of bytecode, slightly faster compression, don't need zlib. | ||
| mode = zipfile.ZIP_STORED | ||
| z = zipfile.ZipFile(out_path, 'w', mode) | ||
| seen = {} | ||
| for line in sys.stdin: | ||
| line = line.strip() | ||
| try: | ||
| full_path, rel_path = line.split(None, 1) | ||
| except ValueError: | ||
| raise RuntimeError('Invalid line %r' % line) | ||
| if rel_path in seen: | ||
| expected = seen[rel_path] | ||
| if expected != full_path: | ||
| print >>sys.stderr, 'WARNING: expected %r, got %r' % (expected, | ||
| full_path) | ||
| continue | ||
| #print >>sys.stderr, '%s -> %s' % (full_path, rel_path) | ||
| z.write(full_path, rel_path) | ||
| seen[rel_path] = full_path | ||
| # TODO: Make summary | ||
| if __name__ == '__main__': | ||
| try: | ||
| main(sys.argv) | ||
| except RuntimeError as e: | ||
| print >>sys.stderr, 'make_zip:', e.args[0] | ||
| sys.exit(1) |
| @@ -0,0 +1,340 @@ | ||
| #!/usr/bin/env python | ||
| from __future__ import print_function | ||
| """ | ||
| quick_ref.py | ||
| """ | ||
| import cgi | ||
| import os | ||
| import pprint | ||
| import re | ||
| import sys | ||
| # e.g. COMMAND LANGUAGE | ||
| CAPS_RE = re.compile(r'^[A-Z ]+$') | ||
| # 1. Optional X, then a SINGLE space | ||
| # 2. lower-case or upper-case topic | ||
| # 3. Optional: A SINGLE space, then punctuation | ||
| TOPIC_RE = re.compile( | ||
| r'\b(X[ ])?\@?([a-z_\-]+|[A-Z0-9_]+)([ ]\S+)?', re.VERBOSE) | ||
| # Sections have alphabetical characters, spaces, and '/' for I/O. They are | ||
| # turned into anchors. | ||
| SECTION_RE = re.compile(r'\s*\[([a-zA-Z /]+)\]') | ||
| # Can occur at the beginning of a line, or before a topic | ||
| RED_X = '<span style="color: darkred">X </span>' | ||
| def _StringToHref(s): | ||
| return s.replace(' ', '-') | ||
| def MaybeHighlightSection(line, parts): | ||
| m = SECTION_RE.match(line) | ||
| if not m: | ||
| return line | ||
| #print >>sys.stderr, m.groups() | ||
| start = m.start(1) | ||
| end = m.end(1) | ||
| parts.append(line[:start]) # this is spaces, so not bothering to escape | ||
| section = m.group(1) | ||
| href = _StringToHref(section) | ||
| section_link = '<a href="#%s" class="level2">%s</a>' % (href, section) | ||
| parts.append(section_link) | ||
| return line[end:] | ||
| def HighlightLine(line): | ||
| """Convert a line of text to HTML. | ||
| Topics are highlighted and X made red.""" | ||
| parts = [] | ||
| last_end = 0 | ||
| found_one = False | ||
| line = MaybeHighlightSection(line, parts) | ||
| for m in TOPIC_RE.finditer(line): | ||
| #print >>sys.stderr, m.groups() | ||
| have_x = m.group(1) is not None | ||
| start = m.start(1) if have_x else m.start(2) | ||
| have_suffix = m.group(3) is not None | ||
| prior_piece = cgi.escape(line[last_end:start]) | ||
| parts.append(prior_piece) | ||
| if have_x: | ||
| parts.append(RED_X) | ||
| # Topics on the same line must be separated by exactly THREE spaces | ||
| if found_one and prior_piece not in (' ', ' @'): | ||
| last_end = start | ||
| break # stop linking | ||
| # this matters because the separator is three spaces | ||
| end = m.end(3) if have_suffix else m.end(2) | ||
| last_end = end | ||
| topic = line[m.start(2):m.end(2)] | ||
| topic_link = '<a href="#%s">%s</a>' % (topic, topic) | ||
| parts.append(topic_link) | ||
| if have_suffix: | ||
| parts.append(cgi.escape(m.group(3))) | ||
| found_one = True | ||
| last_piece = cgi.escape(line[last_end:len(line)]) | ||
| parts.append(last_piece) | ||
| #print >>sys.stderr, parts | ||
| html_line = ''.join(parts) | ||
| #print >>sys.stderr, html_line | ||
| return html_line | ||
| def TableOfContents(f): | ||
| # inputs: -toc.txt, -pages.txt | ||
| # outputs: | ||
| # tree of HTML | ||
| # maybe: man page for OSH usage (need to learn troff formatting!) | ||
| # syntactic elements: | ||
| # - toc | ||
| # - links to pages | ||
| # - (X) for not implemented | ||
| # - aliases: semicolon ; | ||
| # - pages | ||
| # - usage line (^Usage:) | ||
| # - internal links read[1] | ||
| # - <a href="#read"><read> | ||
| # - read[1] | ||
| # | ||
| # - example blocks | ||
| # generated parts: | ||
| # - builtin usage lines, from core/args.py | ||
| # - and osh usage itself | ||
| # Language: | ||
| ##### COMMAND LANGUAGE (turns into <a name=>) | ||
| ### Commands | ||
| # case | ||
| # if | ||
| # Basically any line that begins with ^# ^### or ^##### is speical? | ||
| # <h1> <h2> <h3> | ||
| # Still need links | ||
| # TODO: | ||
| # - Copy sh_spec.py for # parsing | ||
| # - Copy oilshell.org Snip for running examples and showing output! | ||
| # More stuff: | ||
| # - command, word, arith, boolean all need intros. | ||
| # - So does every section need a name? | ||
| # - Maybe just highlight anything after [? | ||
| # - What kind of links are they? | ||
| # Three level hierarchy: | ||
| # CAP WORDS | ||
| # [Title Words For Sections] | ||
| # problem: line brekas like [Shell Process | ||
| # Control] | ||
| # there is no valid way to mark this up, even if you could parse it! | ||
| # you would need a table? | ||
| # lower-with-dashes for topics | ||
| # TODO: Add version and so forht? | ||
| title_line = f.readline() | ||
| print('<h1>%s</h1>' % cgi.escape(title_line)) | ||
| print('<a name="toc"></a>') | ||
| # doc/run.sh must set environment. | ||
| print('<i>Version %s</i>' % os.environ['OIL_VERSION']) | ||
| print('<pre>') | ||
| for line in f: | ||
| if not line.strip(): | ||
| sys.stdout.write('\n') | ||
| continue | ||
| if CAPS_RE.match(line): | ||
| heading = line.strip() | ||
| anchor_text = cgi.escape(heading) | ||
| href = _StringToHref(heading) | ||
| # Add the newline back here | ||
| html_line = '<b><a href="#%s" class="level1">%s</a></b>\n' % ( | ||
| href, anchor_text) | ||
| elif line.startswith(' '): | ||
| html_line = HighlightLine(line) | ||
| elif line.startswith('X '): | ||
| html_line = RED_X + HighlightLine(line[2:]) | ||
| else: | ||
| html_line = cgi.escape(line) | ||
| sys.stdout.write(html_line) | ||
| print('</pre>') | ||
| # TODO: | ||
| # - group 1: # prefix determines h1, h2, h3 | ||
| # - group 2 is the <a name=""> -- there can be MORE THAN ONE | ||
| # - OSH-BINARY | ||
| # - Commands | ||
| # - for-expr | ||
| # - true|false | ||
| # - group 3: the anchor text to display | ||
| # | ||
| ## Conditional Conditional Constructs | ||
| ## Quotes Quotes | ||
| ### COMMAND-LANGUAGE Command Language | ||
| ### {Conditional} Conditional Constructs | ||
| ### <Conditional> Conditional Constructs | ||
| # These have no title? Just true? false? | ||
| # true|false true | ||
| class TextOutput: | ||
| def __init__(self, text_dir, topic_lookup): | ||
| self.text_dir = text_dir | ||
| self.topic_lookup = topic_lookup | ||
| def WriteFile(self, section_id, topics, lines): | ||
| """ | ||
| """ | ||
| section_name = '%d-%d-%d' % tuple(section_id) | ||
| path = os.path.join(self.text_dir, section_name) | ||
| with open(path, 'w') as f: | ||
| for line in lines: | ||
| f.write(line) | ||
| #print >>sys.stderr, 'Wrote %s' % path | ||
| for topic in topics: | ||
| self.topic_lookup[topic] = section_name | ||
| # TODO: Also allow {} in addition to <> delimiters. | ||
| HEADING_RE = re.compile(r'(#+) <(.*)>(.*)') | ||
| def Pages(f, text_out): | ||
| print('<pre>') | ||
| section_id = [0, 0, 0] # L1, L2, L3 | ||
| topics = [] | ||
| prev_topics = [] # from previous iteration | ||
| prev_lines = [] | ||
| for line in f: | ||
| if line.startswith('##'): # heading or comment | ||
| m = HEADING_RE.match(line) | ||
| if m: | ||
| # We got a heading. Write the previous lines | ||
| text_out.WriteFile(section_id, prev_topics, prev_lines) | ||
| prev_lines = [] | ||
| level, topic_str, text = m.groups() | ||
| #print >>sys.stderr, m.groups() | ||
| topics = topic_str.split() | ||
| if not text.strip(): | ||
| text = topic_str | ||
| if len(level) == 5: | ||
| htag = 2 | ||
| section_id[0] += 1 # from 2.3.4 to 3.0.0 | ||
| section_id[1] = 0 | ||
| section_id[2] = 0 | ||
| elif len(level) == 4: | ||
| htag = 3 | ||
| section_id[1] += 1 # from 2.2.3 to 2.3.0 | ||
| section_id[2] = 0 | ||
| elif len(level) == 3: | ||
| htag = 4 | ||
| section_id[2] += 1 # from 2.2.2 to 2.2.3 | ||
| else: | ||
| raise RuntimeError('Invalid level %r' % level) | ||
| print('</pre>') | ||
| for topic in topics: | ||
| print('<a name="%s"></a>' % topic) | ||
| print('<h%d>%s</h%d>' % (htag, text, htag)) | ||
| print('<!-- %d.%d.%d -->' % tuple(section_id)) | ||
| print('<pre>') | ||
| prev_topics = topics | ||
| else: | ||
| # Three or more should be a heading, not a comment. | ||
| if line.startswith('###'): | ||
| raise RuntimeError('Expected a heading, got %r' % line) | ||
| else: # normal line | ||
| sys.stdout.write(cgi.escape(line)) | ||
| prev_lines.append(line) | ||
| continue | ||
| print('</pre>') | ||
| def main(argv): | ||
| action = argv[1] | ||
| if action == 'toc': | ||
| with open(argv[2]) as f: | ||
| TableOfContents(f) | ||
| elif action == 'pages': | ||
| pages_txt, text_dir, py_out_path = argv[2:5] | ||
| topic_lookup = {} | ||
| with open(pages_txt) as f: | ||
| text_out = TextOutput(text_dir, topic_lookup) | ||
| Pages(f, text_out) | ||
| # TODO: Fuzzy matching of help topics | ||
| d = pprint.pformat(topic_lookup) | ||
| #print >>sys.stderr, d | ||
| with open(py_out_path, 'w') as f: | ||
| f.write('TOPIC_LOOKUP = ') | ||
| f.write(d) | ||
| # BUG WORKAROUND: The OPy parser requires an EOL! See opy/run.sh parser-bug. | ||
| f.write('\n') | ||
| print('Wrote %s/ and %s' % (text_dir, py_out_path), file=sys.stderr) | ||
| else: | ||
| raise RuntimeError('Invalid action %r' % action) | ||
| if __name__ == '__main__': | ||
| try: | ||
| main(sys.argv) | ||
| except RuntimeError as e: | ||
| print('FATAL: %s' % e, file=sys.stderr) | ||
| sys.exit(1) |
| @@ -0,0 +1,97 @@ | ||
| #!/usr/bin/python -S | ||
| from __future__ import print_function | ||
| """ | ||
| runpy_deps.py | ||
| NOTE: -S above is important. | ||
| """ | ||
| import sys # 15 modules | ||
| import runpy # 34 modules | ||
| PY_MODULE = 0 | ||
| C_MODULE = 1 | ||
| def FilterModules(modules, stdlib_dir): | ||
| stdlib_dir_len = len(stdlib_dir) | ||
| for name in sorted(modules): | ||
| mod = modules[name] | ||
| if name in ('__builtin__', '__main__'): | ||
| continue | ||
| try: | ||
| full_path = mod.__file__ | ||
| except AttributeError: | ||
| full_path = None | ||
| # If it's cached, it will be under .pyc; otherwise under .py. | ||
| if full_path and full_path.endswith('.py'): | ||
| py_path = full_path | ||
| pyc_path = full_path + 'c' | ||
| elif full_path and full_path.endswith('.pyc'): | ||
| pyc_path = full_path | ||
| py_path = full_path[:-1] | ||
| else: | ||
| # Print a different format for C modules. | ||
| yield C_MODULE, name, full_path | ||
| if py_path: | ||
| if py_path.startswith(stdlib_dir): | ||
| rel_py_path = py_path[stdlib_dir_len:] | ||
| else: | ||
| rel_py_path = py_path | ||
| # .pyc file for execution | ||
| yield PY_MODULE, py_path, rel_py_path | ||
| def main(argv): | ||
| runpy_path = runpy.__file__ | ||
| i = runpy_path.rfind('/') | ||
| assert i != -1, runpy_path | ||
| stdlib_dir = runpy_path[: i + 1] # include trailing slash | ||
| action = argv[1] | ||
| if action == 'both': | ||
| path_prefix = argv[2] | ||
| py_out_path = path_prefix + '/runpy-deps-cpython.txt' | ||
| c_out_path = path_prefix + '/runpy-deps-c.txt' | ||
| # NOTE: This is very similar to build/app_deps.py. | ||
| with open(py_out_path, 'w') as py_out, open(c_out_path, 'w') as c_out: | ||
| for mod_type, x, y in FilterModules(sys.modules, stdlib_dir): | ||
| if mod_type == PY_MODULE: | ||
| print(x, y, file=py_out) | ||
| print(x + 'c', y + 'c', file=py_out) # .pyc goes in bytecode.zip too | ||
| pass | ||
| elif mod_type == C_MODULE: | ||
| print(x, y, file=c_out) # mod_name, full_path | ||
| else: | ||
| raise AssertionError(mod_type) | ||
| print('-- Wrote %s and %s' % (py_out_path, c_out_path), file=sys.stderr) | ||
| elif action == 'py': | ||
| for mod_type, full_path, rel_path in \ | ||
| FilterModules(sys.modules, stdlib_dir): | ||
| if mod_type == PY_MODULE: | ||
| opy_input = full_path | ||
| opy_output = rel_path + 'c' # output is .pyc | ||
| print(opy_input, opy_output) | ||
| else: | ||
| raise RuntimeError('Invalid action %r' % action) | ||
| if __name__ == '__main__': | ||
| try: | ||
| main(sys.argv) | ||
| except RuntimeError as e: | ||
| print('%s: %s' % (sys.argv[0], e.args[0]), file=sys.stderr) | ||
| sys.exit(1) |
| @@ -0,0 +1,11 @@ | ||
| #!/usr/bin/env python | ||
| from distutils.core import setup, Extension | ||
| module = Extension('libc', | ||
| sources = ['native/libc.c'], | ||
| undef_macros = ['NDEBUG']) | ||
| setup(name = 'libc', | ||
| version = '1.0', | ||
| description = 'Module for libc functions like fnmatch()', | ||
| ext_modules = [module]) |
| @@ -0,0 +1,14 @@ | ||
| #!/usr/bin/env python | ||
| from distutils.core import setup, Extension | ||
| # https://stackoverflow.com/questions/4541565/how-can-i-assert-from-python-c-code | ||
| module = Extension('fastlex', | ||
| sources = ['native/fastlex.c'], | ||
| undef_macros = ['NDEBUG'] | ||
| ) | ||
| setup(name = 'fastlex', | ||
| version = '1.0', | ||
| description = 'Module to speed up lexers', | ||
| include_dirs = ['_devbuild/gen'], | ||
| ext_modules = [module]) |
| @@ -0,0 +1,7 @@ | ||
| #!/usr/bin/env python | ||
| """ | ||
| future_import.py | ||
| """ | ||
| from __future__ import print_function | ||
| print('future print') |
| @@ -0,0 +1,80 @@ | ||
| #!/usr/bin/env python | ||
| """ | ||
| hello.py | ||
| """ | ||
| from __future__ import print_function | ||
| import sys | ||
| print('Hello from hello.py', file=sys.stderr) | ||
| import os | ||
| print('sys.path:', sys.path, file=sys.stderr) | ||
| print('sys.argv:', sys.argv, file=sys.stderr) | ||
| print('hello _OVM_IS_BUNDLE', os.getenv('_OVM_IS_BUNDLE'), file=sys.stderr) | ||
| # Default | ||
| if not os.getenv('_OVM_DEPS'): | ||
| import inspect | ||
| print(inspect) | ||
| import lib | ||
| #import zipfile | ||
| import zipimport | ||
| if os.getenv('_OVM_IS_BUNDLE') == '1': | ||
| if 0: | ||
| print('ZIP') | ||
| z = zipfile.ZipFile(sys.argv[0]) | ||
| print(z.infolist()) | ||
| else: | ||
| z = zipimport.zipimporter(sys.argv[0]) | ||
| print(z) | ||
| print(dir(z)) | ||
| # None if we have the module, but no source. | ||
| print('SOURCE', repr(z.get_source('runpy'))) | ||
| # TODO: Add a method to get a file? I think it just imports zlib. | ||
| r = z.get_data('hello-data.txt') | ||
| print('FILE', repr(r)) | ||
| def Busy(n): | ||
| s = 0 | ||
| for i in xrange(n): | ||
| s += i | ||
| print(s) | ||
| def main(argv): | ||
| try: | ||
| action = argv[0] | ||
| except IndexError: | ||
| action = 'busy' | ||
| if action == 'busy': | ||
| try: | ||
| n = int(argv[1]) | ||
| except IndexError: | ||
| n = 100 | ||
| Busy(n) | ||
| elif action == 'unicode': # For testing that we support 4-byte chars | ||
| print(repr(unichr(0x10000))) | ||
| elif action == 'crash': # For testing that stack trace shows code | ||
| lib.Crash() | ||
| elif action == 'exit42': # for testing exit code | ||
| return 42 | ||
| else: | ||
| print('Unknown action %r' % action, file=sys.stderr) | ||
| return 1 | ||
| return 0 | ||
| if __name__ == '__main__': | ||
| sys.exit(main(sys.argv[1:])) |
| @@ -0,0 +1,13 @@ | ||
| #!/usr/bin/env python | ||
| """ | ||
| lib.py | ||
| """ | ||
| from __future__ import print_function | ||
| import sys | ||
| print('Hello from lib.py', file=sys.stderr) | ||
| def Crash(): | ||
| raise RuntimeError('oops') | ||
| @@ -0,0 +1,201 @@ | ||
| """Helper to provide extensibility for pickle/cPickle. | ||
| This is only useful to add pickle support for extension types defined in | ||
| C, not for instances of user-defined classes. | ||
| """ | ||
| from types import ClassType as _ClassType | ||
| __all__ = ["pickle", "constructor", | ||
| "add_extension", "remove_extension", "clear_extension_cache"] | ||
| dispatch_table = {} | ||
| def pickle(ob_type, pickle_function, constructor_ob=None): | ||
| if type(ob_type) is _ClassType: | ||
| raise TypeError("copy_reg is not intended for use with classes") | ||
| if not hasattr(pickle_function, '__call__'): | ||
| raise TypeError("reduction functions must be callable") | ||
| dispatch_table[ob_type] = pickle_function | ||
| # The constructor_ob function is a vestige of safe for unpickling. | ||
| # There is no reason for the caller to pass it anymore. | ||
| if constructor_ob is not None: | ||
| constructor(constructor_ob) | ||
| def constructor(object): | ||
| if not hasattr(object, '__call__'): | ||
| raise TypeError("constructors must be callable") | ||
| # Example: provide pickling support for complex numbers. | ||
| try: | ||
| complex | ||
| except NameError: | ||
| pass | ||
| else: | ||
| def pickle_complex(c): | ||
| return complex, (c.real, c.imag) | ||
| pickle(complex, pickle_complex, complex) | ||
| # Support for pickling new-style objects | ||
| def _reconstructor(cls, base, state): | ||
| if base is object: | ||
| obj = object.__new__(cls) | ||
| else: | ||
| obj = base.__new__(cls, state) | ||
| if base.__init__ != object.__init__: | ||
| base.__init__(obj, state) | ||
| return obj | ||
| _HEAPTYPE = 1<<9 | ||
| # Python code for object.__reduce_ex__ for protocols 0 and 1 | ||
| def _reduce_ex(self, proto): | ||
| assert proto < 2 | ||
| for base in self.__class__.__mro__: | ||
| if hasattr(base, '__flags__') and not base.__flags__ & _HEAPTYPE: | ||
| break | ||
| else: | ||
| base = object # not really reachable | ||
| if base is object: | ||
| state = None | ||
| else: | ||
| if base is self.__class__: | ||
| raise TypeError, "can't pickle %s objects" % base.__name__ | ||
| state = base(self) | ||
| args = (self.__class__, base, state) | ||
| try: | ||
| getstate = self.__getstate__ | ||
| except AttributeError: | ||
| if getattr(self, "__slots__", None): | ||
| raise TypeError("a class that defines __slots__ without " | ||
| "defining __getstate__ cannot be pickled") | ||
| try: | ||
| dict = self.__dict__ | ||
| except AttributeError: | ||
| dict = None | ||
| else: | ||
| dict = getstate() | ||
| if dict: | ||
| return _reconstructor, args, dict | ||
| else: | ||
| return _reconstructor, args | ||
| # Helper for __reduce_ex__ protocol 2 | ||
| def __newobj__(cls, *args): | ||
| return cls.__new__(cls, *args) | ||
| def _slotnames(cls): | ||
| """Return a list of slot names for a given class. | ||
| This needs to find slots defined by the class and its bases, so we | ||
| can't simply return the __slots__ attribute. We must walk down | ||
| the Method Resolution Order and concatenate the __slots__ of each | ||
| class found there. (This assumes classes don't modify their | ||
| __slots__ attribute to misrepresent their slots after the class is | ||
| defined.) | ||
| """ | ||
| # Get the value from a cache in the class if possible | ||
| names = cls.__dict__.get("__slotnames__") | ||
| if names is not None: | ||
| return names | ||
| # Not cached -- calculate the value | ||
| names = [] | ||
| if not hasattr(cls, "__slots__"): | ||
| # This class has no slots | ||
| pass | ||
| else: | ||
| # Slots found -- gather slot names from all base classes | ||
| for c in cls.__mro__: | ||
| if "__slots__" in c.__dict__: | ||
| slots = c.__dict__['__slots__'] | ||
| # if class has a single slot, it can be given as a string | ||
| if isinstance(slots, basestring): | ||
| slots = (slots,) | ||
| for name in slots: | ||
| # special descriptors | ||
| if name in ("__dict__", "__weakref__"): | ||
| continue | ||
| # mangled names | ||
| elif name.startswith('__') and not name.endswith('__'): | ||
| names.append('_%s%s' % (c.__name__, name)) | ||
| else: | ||
| names.append(name) | ||
| # Cache the outcome in the class if at all possible | ||
| try: | ||
| cls.__slotnames__ = names | ||
| except: | ||
| pass # But don't die if we can't | ||
| return names | ||
| # A registry of extension codes. This is an ad-hoc compression | ||
| # mechanism. Whenever a global reference to <module>, <name> is about | ||
| # to be pickled, the (<module>, <name>) tuple is looked up here to see | ||
| # if it is a registered extension code for it. Extension codes are | ||
| # universal, so that the meaning of a pickle does not depend on | ||
| # context. (There are also some codes reserved for local use that | ||
| # don't have this restriction.) Codes are positive ints; 0 is | ||
| # reserved. | ||
| _extension_registry = {} # key -> code | ||
| _inverted_registry = {} # code -> key | ||
| _extension_cache = {} # code -> object | ||
| # Don't ever rebind those names: cPickle grabs a reference to them when | ||
| # it's initialized, and won't see a rebinding. | ||
| def add_extension(module, name, code): | ||
| """Register an extension code.""" | ||
| code = int(code) | ||
| if not 1 <= code <= 0x7fffffff: | ||
| raise ValueError, "code out of range" | ||
| key = (module, name) | ||
| if (_extension_registry.get(key) == code and | ||
| _inverted_registry.get(code) == key): | ||
| return # Redundant registrations are benign | ||
| if key in _extension_registry: | ||
| raise ValueError("key %s is already registered with code %s" % | ||
| (key, _extension_registry[key])) | ||
| if code in _inverted_registry: | ||
| raise ValueError("code %s is already in use for key %s" % | ||
| (code, _inverted_registry[code])) | ||
| _extension_registry[key] = code | ||
| _inverted_registry[code] = key | ||
| def remove_extension(module, name, code): | ||
| """Unregister an extension code. For testing only.""" | ||
| key = (module, name) | ||
| if (_extension_registry.get(key) != code or | ||
| _inverted_registry.get(code) != key): | ||
| raise ValueError("key %s is not registered with code %s" % | ||
| (key, code)) | ||
| del _extension_registry[key] | ||
| del _inverted_registry[code] | ||
| if code in _extension_cache: | ||
| del _extension_cache[code] | ||
| def clear_extension_cache(): | ||
| _extension_cache.clear() | ||
| # Standard extension code assignments | ||
| # Reserved ranges | ||
| # First Last Count Purpose | ||
| # 1 127 127 Reserved for Python standard library | ||
| # 128 191 64 Reserved for Zope | ||
| # 192 239 48 Reserved for 3rd parties | ||
| # 240 255 16 Reserved for private use (will never be assigned) | ||
| # 256 Inf Inf Reserved for future assignment | ||
| # Extension codes are assigned by the Python Software Foundation. |
| @@ -0,0 +1,195 @@ | ||
| """ | ||
| alloc.py - Sketch of memory management. | ||
| This is roughly what you might do in C++, but it's probably overly complicated | ||
| for Python. | ||
| The idea is to save the LST for functions, but discard it for commands that | ||
| have already executed. Each statement/function can be parsed into a separate | ||
| Arena, and the entire Arena can be discarded at once. | ||
| Also, we don't want to save comment lines. | ||
| """ | ||
| from asdl import const | ||
| from core import util | ||
| class Arena(object): | ||
| """A collection of lines and line spans. | ||
| In C++ and maybe Oil: A block of memory that can be freed at once. | ||
| Two use cases: | ||
| 1. Reformatting: ClearLastLine() is never called | ||
| 2. Execution: ClearLastLine() for lines that are all comments. The purpose | ||
| of this is not to penalize big comment blocks in .rc files and completion | ||
| files! | ||
| """ | ||
| def __init__(self, arena_id): | ||
| self.arena_id = arena_id # an integer stored in tokens | ||
| # Could be std::vector<char *> pointing into a std::string. | ||
| # NOTE: lines are required for bootstrapping code within the binary, and | ||
| # also required for interactive or stdin, but optional when code is on | ||
| # disk. We can go look it up later to save memory. | ||
| self.lines = [] | ||
| self.next_line_id = 0 | ||
| # first real span is 1. 0 means undefined. | ||
| self.spans = [] | ||
| self.next_span_id = 0 | ||
| # List of (src_path index, physical line number). This is two integers for | ||
| # every line read. We could use a clever encoding of this. (Although the | ||
| # it's probably more important to compact the ASDL representation.) | ||
| self.debug_info = [] | ||
| self.src_paths = [] # list of source paths | ||
| self.src_id_stack = [] # stack of src_id integers | ||
| def IsComplete(self): | ||
| """Return whether we have a full set of lines -- none of which was cleared. | ||
| Maybe just an assertion error. | ||
| """ | ||
| def PushSource(self, src_path): | ||
| src_id = len(self.src_paths) | ||
| self.src_paths.append(src_path) | ||
| self.src_id_stack.append(src_id) | ||
| def PopSource(self): | ||
| self.src_id_stack.pop() | ||
| def AddLine(self, line, line_num): | ||
| """ | ||
| Args: | ||
| line: string | ||
| line_num: physical line number, for printing | ||
| TODO: Add an option of whether to save the line? You can retrieve it on | ||
| disk in many cases. (But not in the stdin, '-c', 'eval' case) | ||
| """ | ||
| line_id = self.next_line_id | ||
| self.lines.append(line) | ||
| self.next_line_id += 1 | ||
| self.debug_info.append((self.src_id_stack[-1], line_num)) | ||
| return line_id | ||
| def ClearLastLine(self): | ||
| """Call if it was a comment.""" | ||
| pass | ||
| def GetLine(self, line_id): | ||
| """ | ||
| Given an line ID, return the actual filename, physical line number, and | ||
| line contents. | ||
| """ | ||
| assert line_id >= 0, line_id | ||
| return self.lines[line_id] | ||
| def AddLineSpan(self, line_span): | ||
| """ | ||
| TODO: Add an option of whether to save the line? You can retrieve it on | ||
| disk in many cases. | ||
| """ | ||
| span_id = self.next_span_id | ||
| self.spans.append(line_span) | ||
| self.next_span_id += 1 | ||
| return span_id | ||
| def GetLineSpan(self, span_id): | ||
| assert span_id != const.NO_INTEGER, span_id | ||
| try: | ||
| return self.spans[span_id] | ||
| except IndexError: | ||
| util.log('Span ID out of range: %d', span_id) | ||
| raise | ||
| def GetDebugInfo(self, line_id): | ||
| """Get the path and physical line number, for parse errors.""" | ||
| assert line_id != const.NO_INTEGER, line_id | ||
| src_id , line_num = self.debug_info[line_id] | ||
| try: | ||
| path = self.src_paths[src_id] | ||
| except IndexError: | ||
| print('INDEX', src_id) | ||
| raise | ||
| return path, line_num | ||
| def CompletionArena(pool): | ||
| """A temporary arena that only exists for a function call?""" | ||
| arena = pool.NewArena() | ||
| arena.PushSource('<temp completion buffer>') | ||
| return arena | ||
| def PluginArena(source_name): | ||
| """For PS4, etc.""" | ||
| # TODO: Should there only be one pool? This isn't worked out yet. | ||
| pool = Pool() | ||
| arena = pool.NewArena() | ||
| arena.PushSource(source_name) | ||
| return arena | ||
| # In C++, InteractiveLineReader and StringLineReader should use the same | ||
| # representation: std::string with internal NULs to terminate lines, and then | ||
| # std::vector<char*> that points into to it. | ||
| # InteractiveLineReader only needs to save a line if it contains a function. | ||
| # The parser needs to set a flag if it contains a function! | ||
| class Pool(object): | ||
| """Owns source lines plus debug info. | ||
| Two use cases: | ||
| 1. Reformatting: PopArena() is never called | ||
| 2. Execution: PopArena() is called if an arena doesn't have any functions. | ||
| If the whole thing was executed. | ||
| At the end of the program, all remaining arenas can be freed, or we just let | ||
| the OS clean up. Probably in debug/ASAN mode, we will clean it up. We also | ||
| want to clean up in embedded mode. the oil_Init() and oil_Destroy() methods | ||
| of the API should do this. | ||
| """ | ||
| def __init__(self): | ||
| self.arenas = [] | ||
| self.next_arena_id = 0 | ||
| # NOTE: dash uses a similar scheme. stalloc() / setstackmark() / | ||
| # popstackmark() in memalloc.c. | ||
| # We're not using Push/POp terminology because you never pop twice. You can | ||
| # only destroy the top/last arena. | ||
| def NewArena(self): | ||
| """Call this after parsing anything that you might want to destroy.""" | ||
| a = Arena(self.next_arena_id) | ||
| self.next_arena_id += 1 | ||
| self.arenas.append(a) | ||
| return a | ||
| def DestroyLastArena(self): | ||
| """ | ||
| Free everything in the last arena (possibly reusing it). This is done | ||
| after we executed all of its statements if there were no function | ||
| definitions that need to be executed later. | ||
| """ | ||
| a = self.arenas.pop() | ||
| # This removes lines and spans? | ||
| del a | ||
| def IsComplete(self): | ||
| """Return whether we have one arena that was never destroyed?""" | ||
| # TODO: Also need arena_id | ||
| # NOTE: Not used right now. | ||
| def SpanValue(span, arena): | ||
| """Given an line_span and a arena of lines, return the string value. | ||
| """ | ||
| line = arena.GetLine(span.line_id) | ||
| c = span.col | ||
| return line[c : c + span.length] |
| @@ -0,0 +1,55 @@ | ||
| #!/usr/bin/env python | ||
| """ | ||
| alloc_test.py: Tests for alloc.py | ||
| """ | ||
| import unittest | ||
| from core import alloc # module under test | ||
| class AllocTest(unittest.TestCase): | ||
| def setUp(self): | ||
| p = alloc.Pool() | ||
| self.arena = p.NewArena() | ||
| def testPool(self): | ||
| arena = self.arena | ||
| arena.PushSource('one.oil') | ||
| line_id = arena.AddLine('line 1', 1) | ||
| self.assertEqual(0, line_id) | ||
| line_id = arena.AddLine('line 2', 2) | ||
| self.assertEqual(1, line_id) | ||
| span_id = arena.AddLineSpan(None) | ||
| self.assertEqual(0, span_id) | ||
| arena.PopSource() | ||
| self.assertEqual(('one.oil', 1), arena.GetDebugInfo(0)) | ||
| self.assertEqual(('one.oil', 2), arena.GetDebugInfo(1)) | ||
| def testPushSource(self): | ||
| arena = self.arena | ||
| arena.PushSource('one.oil') | ||
| arena.AddLine('echo 1a', 1) | ||
| arena.AddLine('source two.oil', 2) | ||
| arena.PushSource('two.oil') | ||
| arena.AddLine('echo 2a', 1) | ||
| id2 = arena.AddLine('echo 2b', 2) # line 2 of two.oil | ||
| arena.PopSource() | ||
| id3 = arena.AddLine('echo 1c', 3) # line 3 of one.oil | ||
| arena.PopSource() | ||
| # TODO: fix these assertions | ||
| self.assertEqual(('two.oil', 2), arena.GetDebugInfo(id2)) | ||
| self.assertEqual(('one.oil', 3), arena.GetDebugInfo(id3)) | ||
| if __name__ == '__main__': | ||
| unittest.main() |
| @@ -0,0 +1,182 @@ | ||
| #!/usr/bin/env python | ||
| """ | ||
| args_test.py: Tests for args.py | ||
| """ | ||
| import unittest | ||
| from core import args # module under test | ||
| class ArgsTest(unittest.TestCase): | ||
| def testFlagsAndOptions(self): | ||
| s = args.FlagsAndOptions() | ||
| s.ShortFlag('-c', args.Str) | ||
| s.ShortFlag('-i', args.Str) | ||
| s.LongFlag('--help') | ||
| s.LongFlag('--rcfile', args.Str) | ||
| s.LongFlag('--ast-format', ['text', 'html']) | ||
| s.Option('e', 'errexit') | ||
| s.Option('u', 'nounset') | ||
| s.Option(None, 'pipefail') | ||
| # don't parse args afterward | ||
| argv = ['-c', 'echo hi', '-e', '-o', 'nounset', 'foo', '--help'] | ||
| arg, i = s.Parse(argv) | ||
| print(arg, argv[i:]) | ||
| self.assertEqual(['foo', '--help'], argv[i:]) | ||
| self.assertEqual('echo hi', arg.c) | ||
| self.assertEqual(None, arg.help) | ||
| self.assertEqual(None, arg.i) | ||
| self.assertEqual( | ||
| [('errexit', True), ('nounset', True)], arg.opt_changes) | ||
| argv = ['+e', '+o', 'nounset', '-o', 'pipefail', 'foo'] | ||
| arg, i = s.Parse(argv) | ||
| print(arg, argv[i:]) | ||
| self.assertEqual(['foo'], argv[i:]) | ||
| self.assertEqual(None, arg.i) | ||
| self.assertEqual( | ||
| [('errexit', False), ('nounset', False), ('pipefail', True)], | ||
| arg.opt_changes) | ||
| self.assertRaises(args.UsageError, s.Parse, ['-o', 'pipefailX']) | ||
| argv = ['-c', 'echo hi', '--help', '--rcfile', 'bashrc'] | ||
| arg, i = s.Parse(argv) | ||
| self.assertEqual('echo hi', arg.c) | ||
| self.assertEqual(True, arg.help) | ||
| self.assertEqual('bashrc', arg.rcfile) | ||
| # This is an odd syntax! | ||
| argv = ['-euo', 'pipefail'] | ||
| arg, i = s.Parse(argv) | ||
| self.assertEqual( | ||
| [('errexit', True), ('nounset', True), ('pipefail', True)], | ||
| arg.opt_changes) | ||
| self.assertEqual(2, i) | ||
| # Even weirder! | ||
| argv = ['+oeu', 'pipefail'] | ||
| arg, i = s.Parse(argv) | ||
| self.assertEqual( | ||
| [('pipefail', False), ('errexit', False), ('nounset', False)], | ||
| arg.opt_changes) | ||
| self.assertEqual(2, i) | ||
| # Even weirder! | ||
| argv = ['+oo', 'pipefail', 'errexit'] | ||
| arg, i = s.Parse(argv) | ||
| self.assertEqual( | ||
| [('pipefail', False), ('errexit', False)], | ||
| arg.opt_changes) | ||
| self.assertEqual(3, i) | ||
| # Now this is an arg. Gah. | ||
| argv = ['+o', 'pipefail', 'errexit'] | ||
| arg, i = s.Parse(argv) | ||
| self.assertEqual([('pipefail', False)], arg.opt_changes) | ||
| self.assertEqual(['errexit'], argv[i:]) | ||
| # NOTE: 'set -ooo' and 'set -o -o -o' bash runs 'set -o' three times! | ||
| # We're not going to replicate that silly behavior. | ||
| def testChoices(self): | ||
| s = args.FlagsAndOptions() | ||
| s.LongFlag('--ast-format', ['text', 'html']) | ||
| arg, i = s.Parse(['--ast-format', 'text']) | ||
| self.assertEqual('text', arg.ast_format) | ||
| self.assertRaises(args.UsageError, s.Parse, ['--ast-format', 'oops']) | ||
| def testBuiltinFlags(self): | ||
| s = args.BuiltinFlags() | ||
| s.ShortFlag('-f') | ||
| s.ShortFlag('-n') | ||
| s.ShortFlag('-d', args.Str) # delimiter | ||
| arg, i = s.Parse(['-f', 'foo', 'bar']) | ||
| self.assertEqual(1, i) | ||
| self.assertEqual(True, arg.f) | ||
| self.assertEqual(None, arg.n) | ||
| self.assertRaises(args.UsageError, s.Parse, ['-f', '-d']) | ||
| arg, i = s.Parse(['-d', ' ', 'foo']) | ||
| self.assertEqual(2, i) | ||
| self.assertEqual(' ', arg.d) | ||
| arg, i = s.Parse(['-d,', 'foo']) | ||
| self.assertEqual(1, i) | ||
| self.assertEqual(',', arg.d) | ||
| def testReadBuiltinFlags(self): | ||
| s = args.BuiltinFlags() | ||
| s.ShortFlag('-r') # no backslash escapes | ||
| s.ShortFlag('-t', args.Float) # timeout | ||
| s.ShortFlag('-p', args.Str) # prompt string | ||
| arg, i = s.Parse(['-r', 'foo']) | ||
| self.assertEqual(True, arg.r) | ||
| self.assertEqual(1, i) | ||
| arg, i = s.Parse(['-p', '>']) | ||
| self.assertEqual(None, arg.r) | ||
| self.assertEqual('>', arg.p) | ||
| self.assertEqual(2, i) | ||
| arg, i = s.Parse(['-rp', '>']) | ||
| self.assertEqual(True, arg.r) | ||
| self.assertEqual('>', arg.p) | ||
| self.assertEqual(2, i) | ||
| # REALLY ANNOYING: The first r is a flag, the second R is the prompt! Only | ||
| # works in that order | ||
| # Does that mean anything with an arity consumes the rest? | ||
| # read -p line | ||
| # | ||
| arg, i = s.Parse(['-rpr']) | ||
| self.assertEqual(True, arg.r) | ||
| self.assertEqual('r', arg.p) | ||
| self.assertEqual(1, i) | ||
| argv = ['-t1.5', '>'] | ||
| arg, i = s.Parse(argv) | ||
| self.assertEqual(1.5, arg.t) | ||
| self.assertEqual(1, i) | ||
| # Invalid flag 'z' | ||
| self.assertRaises(args.UsageError, s.Parse, ['-rz']) | ||
| def testParseLikeEcho(self): | ||
| s = args.BuiltinFlags() | ||
| s.ShortFlag('-e') # no backslash escapes | ||
| s.ShortFlag('-n') | ||
| arg, i = s.ParseLikeEcho(['-e', '-n', 'foo']) | ||
| self.assertEqual(True, arg.e) | ||
| self.assertEqual(True, arg.n) | ||
| self.assertEqual(2, i) | ||
| arg, i = s.ParseLikeEcho(['-en', 'foo']) | ||
| self.assertEqual(True, arg.e) | ||
| self.assertEqual(True, arg.n) | ||
| self.assertEqual(1, i) | ||
| arg, i = s.ParseLikeEcho(['-ez', 'foo']) | ||
| self.assertEqual(None, arg.e) | ||
| self.assertEqual(None, arg.n) | ||
| self.assertEqual(0, i) | ||
| if __name__ == '__main__': | ||
| unittest.main() |
| @@ -0,0 +1,268 @@ | ||
| #!/usr/bin/env python | ||
| from __future__ import print_function | ||
| """ | ||
| braces.py | ||
| NOTE: bash implements brace expansion in the braces.c file (835 lines). It | ||
| uses goto! | ||
| Possible optimization flags for CompoundWord: | ||
| - has Lit_LBrace, LitRBrace -- set during word_parse phase | ||
| - it if has both, then do _BraceDetect | ||
| - has BracedAltPart -- set during _BraceDetect | ||
| - if it does, then do the expansion | ||
| - has Lit_Star, ?, [ ] -- globbing? | ||
| - but after expansion do you still have those flags? | ||
| """ | ||
| import sys | ||
| from osh.meta import Id | ||
| from osh.meta import ast | ||
| word_part_e = ast.word_part_e | ||
| word_e = ast.word_e | ||
| class _StackFrame(object): | ||
| def __init__(self, cur_parts): | ||
| self.cur_parts = cur_parts | ||
| self.alt_part = ast.BracedAltPart() | ||
| self.saw_comma = False | ||
| def _BraceDetect(w): | ||
| """ | ||
| Args: | ||
| CompoundWord | ||
| Returns: | ||
| CompoundWord or None? | ||
| Another option: | ||
| Grammar: | ||
| # an alternative is a literal, possibly empty, or another brace_expr | ||
| part = <any part except LiteralPart> | ||
| alt = part* | brace_expr | ||
| # a brace_expr is group of at least 2 braced and comma-separated | ||
| # alternatives, with optional prefix and suffix. | ||
| brace_expr = part* '{' alt ',' alt (',' alt)* '}' part* | ||
| Problem this grammar: it's not LL(1) | ||
| Is it indirect left-recursive? | ||
| What's the best way to handle it? LR(1) parser? | ||
| Iterative algorithm: | ||
| Parse it with a stack? | ||
| It's a stack that asserts there is at least one , in between {} | ||
| Yeah just go through and when you see {, push another list. | ||
| When you get , append to list | ||
| When you get } and at least one ',', appendt o list | ||
| When you get } without, then pop | ||
| If there is no matching }, then abort with error | ||
| if not balanced, return error too? | ||
| """ | ||
| # Errors: | ||
| # }a{ - stack depth dips below 0 | ||
| # {a,b}{ - Stack depth doesn't end at 0 | ||
| # {a} - no comma, and also not an numeric range | ||
| cur_parts = [] | ||
| stack = [] | ||
| found = False | ||
| for i, part in enumerate(w.parts): | ||
| append = True | ||
| if part.tag == word_part_e.LiteralPart: | ||
| id_ = part.token.id | ||
| if id_ == Id.Lit_LBrace: | ||
| # Save prefix parts. Start new parts list. | ||
| new_frame = _StackFrame(cur_parts) | ||
| stack.append(new_frame) | ||
| cur_parts = [] | ||
| append = False | ||
| found = True # assume found, but can early exit with None later | ||
| elif id_ == Id.Lit_Comma: | ||
| # Append a new alternative. | ||
| #print('*** Appending after COMMA', cur_parts) | ||
| # NOTE: Should we allow this: | ||
| # ,{a,b} | ||
| # or force this: | ||
| # \,{a,b} | ||
| # ? We're forcing braces right now but not commas. | ||
| if stack: | ||
| stack[-1].saw_comma = True | ||
| stack[-1].alt_part.words.append(ast.CompoundWord(cur_parts)) | ||
| cur_parts = [] # clear | ||
| append = False | ||
| elif id_ == Id.Lit_RBrace: | ||
| # TODO: | ||
| # - Detect lack of , -- abort the whole thing | ||
| # - Detect {1..10} and {1..10..2} | ||
| # - bash and zsh only -- this is NOT implemented by mksh | ||
| # - Use a regex on the middle part: | ||
| # - digit+ '..' digit+ ( '..' digit+ )? | ||
| # - Char ranges are bash only! | ||
| # | ||
| # ast.BracedIntRangePart() | ||
| # ast.CharRangePart() | ||
| if not stack: # e.g. echo } -- unbalancd { | ||
| return None | ||
| if not stack[-1].saw_comma: # {foo} is not a real alternative | ||
| return None | ||
| stack[-1].alt_part.words.append(ast.CompoundWord(cur_parts)) | ||
| frame = stack.pop() | ||
| cur_parts = frame.cur_parts | ||
| cur_parts.append(frame.alt_part) | ||
| append = False | ||
| if append: | ||
| cur_parts.append(part) | ||
| if len(stack) != 0: | ||
| return None | ||
| if found: | ||
| return ast.BracedWordTree(cur_parts) | ||
| else: | ||
| return None | ||
| def BraceDetectAll(words): | ||
| out = [] | ||
| for w in words: | ||
| #print(w) | ||
| brace_tree = _BraceDetect(w) | ||
| if brace_tree: | ||
| out.append(brace_tree) | ||
| else: | ||
| out.append(w) | ||
| return out | ||
| # Possible optmization for later: | ||
| def _TreeCount(tree_word): | ||
| """Count output size for allocation purposes. | ||
| We can count the number of words expanded into, and the max number of parts | ||
| in a word. | ||
| Every word can have a differnt number of parts, e.g. -{'a'b,c}- expands into | ||
| words of 4 parts, then 3 parts. | ||
| """ | ||
| # TODO: Copy the structure of _BraceExpand and _BraceExpandOne. | ||
| for part in tree_word.parts: | ||
| if part.tag == word_part_e.BracedAltPart: | ||
| for word in part.words: | ||
| pass | ||
| num_results = 2 | ||
| max_parts = 5 | ||
| return num_results, max_parts | ||
| def _BraceExpandOne(parts, first_alt_index, suffixes): | ||
| """Helper for _BraceExpand. | ||
| Args: | ||
| parts: input parts | ||
| first_alt_index: index of the first BracedAltPart | ||
| suffixes: List of suffixes to append. | ||
| """ | ||
| out = [] | ||
| # Need to call _BraceExpand on each of the inner words too! | ||
| first_alt = parts[first_alt_index] | ||
| expanded_alts = [] | ||
| for w in first_alt.words: | ||
| expanded_alts.extend(_BraceExpand(w.parts)) | ||
| prefix = parts[ : first_alt_index] | ||
| for alt_parts in expanded_alts: | ||
| for suffix in suffixes: | ||
| out_parts = [] | ||
| out_parts.extend(prefix) | ||
| out_parts.extend(alt_parts) | ||
| out_parts.extend(suffix) | ||
| # TODO: Do we need to preserve flags? | ||
| out.append(out_parts) | ||
| return out | ||
| def _BraceExpand(parts): | ||
| num_alts = 0 | ||
| first_alt_index = -1 | ||
| for i, part in enumerate(parts): | ||
| if part.tag == word_part_e.BracedAltPart: | ||
| num_alts += 1 | ||
| if num_alts == 1: | ||
| first_alt_index = i | ||
| elif num_alts == 2: | ||
| break # don't need to count anymore | ||
| # NOTE: There are TWO recursive calls here, not just one -- one for | ||
| # nested {}, and one for adjacent {}. Thus it's hard to do iteratively. | ||
| if num_alts == 0: | ||
| return [parts] | ||
| elif num_alts == 1: | ||
| suffix = parts[first_alt_index+1 : ] | ||
| return _BraceExpandOne(parts, first_alt_index, [suffix]) | ||
| else: | ||
| # Now call it on the tail | ||
| tail_parts = parts[first_alt_index+1 : ] | ||
| suffixes = _BraceExpand(tail_parts) # recursive call | ||
| return _BraceExpandOne(parts, first_alt_index, suffixes) | ||
| def BraceExpandWords(words): | ||
| out = [] | ||
| for w in words: | ||
| if w.tag == word_e.BracedWordTree: | ||
| parts_list = _BraceExpand(w.parts) | ||
| out.extend(ast.CompoundWord(p) for p in parts_list) | ||
| else: | ||
| out.append(w) | ||
| return out | ||
| def _Cartesian(tuples): | ||
| if len(tuples) == 1: | ||
| for x in tuples[0]: | ||
| yield (x,) | ||
| else: | ||
| for x in tuples[0]: | ||
| for y in _Cartesian(tuples[1:]): | ||
| yield (x,) + y # join tuples | ||
| def main(argv): | ||
| for t in _Cartesian([('a', 'b')]): | ||
| print(t) | ||
| print('--') | ||
| for t in _Cartesian([('a', 'b'), ('c', 'd', 'e'), ('f', 'g')]): | ||
| print(t) | ||
| if __name__ == '__main__': | ||
| try: | ||
| main(sys.argv) | ||
| except RuntimeError as e: | ||
| print('FATAL: %s' % e, file=sys.stderr) | ||
| sys.exit(1) |