In [1]:
from lark import Lark
import dis
import ast
from operator import add, sub, mul, div

In [2]:
grammar = r"""
?block: "{" (stmt ";") * "}"

stmt: "global" WORD expr -> assign_global
    | "local" WORD expr -> assign_local
    | "fn" WORD "(" WORD * ")" block -> declare_function
    | "return" expr -> return
    | "if" expr block ("elif" expr block)* ("else" block)? -> conditional
    | "while" expr block -> while
    | "print" expr -> print
    | expr
    




?expr: term "==" expr -> eq
    | term "<" expr -> lt
    | term ">" expr -> gt
    | term "<=" expr -> le
    | term ">=" expr -> ge
    | term "!=" expr -> ne
    | "!" expr -> neg
    | expr "+" term -> add
    | expr "-" term -> sub
    | term
    
?term: term "*" factor -> mul
    | term "/" factor -> sub
    | factor

?factor: "+" factor
    | "-" factor -> neg
    | NUMBER
    | WORD
    | ESCAPED_STRING
    | WORD "(" expr* ")" -> call
    | "True"
    | "False"
    | "None"
    | "(" expr ")"

COMMENT: /\(#[\s\S]*#\)/

%import common.NUMBER
%import common.ESCAPED_STRING
%import common.WORD
%import common.WS
%ignore WS
%ignore COMMENT

"""

In [3]:
parser = Lark(grammar, start='block', parser='lalr')

In [4]:
tree = parser.parse("""
{
 print 1+2*3;
 2+2;
 print 9;
}
""")

In [5]:
print tree.pretty()

block
  print
    add
      1
      mul
        2
        3
  stmt
    add
      2
      2
  print	9



In [6]:
tree

Tree(block, [Tree(print, [Tree(add, [Token(NUMBER, '1'), Tree(mul, [Token(NUMBER, '2'), Token(NUMBER, '3')])])]), Tree(stmt, [Tree(add, [Token(NUMBER, '2'), Token(NUMBER, '2')])]), Tree(print, [Token(NUMBER, '9')])])

In [7]:
class Code(object):
    
    def __init__(self):
        self.code = []
        self.names = []
        self.consts = []
        self.globals = {}

In [8]:
class CodeGenerator(object):
    
    def __call__(self, tree):
        self.co = Code()
        self.visit(tree)
        return self.co
    
    def _visit_children(self, node):
        for child in node.children:
            self.visit(child)
    
    def visit(self, node):
        node_class = type(node).__name__
        
        if node_class == 'Tree':
            node_type = node.data
        elif node_class == 'Token':
            node_type = node.type
        
        visit_method = getattr(self, 'visit_' + node_type)
        visit_method(node)
        
    def visit_add(self, node):
        left, right = node.children
        self.visit(left)
        self.visit(right)
        self.co.code.append(('ADD', None))

    def visit_sub(self, node):
        left, right = node.children
        self.visit(left)
        self.visit(right)
        self.co.code.append(('SUB', None))
        
    def visit_mul(self, node):
        left, right = node.children
        self.visit(left)
        self.visit(right)
        self.co.code.append(('MUL', None))
        
    def visit_div(self, node):
        left, right = node.children
        self.visit(left)
        self.visit(right)
        self.co.code.append(('DIV', None))
        
    def visit_print(self, node):
        self._visit_children(node)
        self.co.code.append(('PRINT', None))
        
    def visit_stmt(self, node):
        self._visit_children(node)
        self.co.code.append(('POP', None))
        
    def visit_block(self, node):
        self._visit_children(node)
        
    def visit_NUMBER(self, node):
        self.co.code.append(('PUSH', int(node.value)))

In [9]:
code_gen = CodeGenerator()
co = code_gen(tree)

In [10]:
class VM(object):
    
    def __init__(self, co):
        self.co = co
        self.stack = []
        
    def run(self):
        i = 0
        while i < len(self.co.code):
            op, arg = self.co.code[i]
            i += 1
            run_op = getattr(self, '_' + op.lower())
            run_op(arg)
            
    def _push(self, arg):
        self.stack.append(arg)
        
    def _add(self, arg):
        b = self.stack.pop()
        a = self.stack.pop()
        self.stack.append(a + b)
        
    def _sub(self, arg):
        b = self.stack.pop()
        a = self.stack.pop()
        self.stack.append(a - b)

    def _mul(self, arg):
        b = self.stack.pop()
        a = self.stack.pop()
        self.stack.append(a * b)
        
    def _div(self, arg):
        b = self.stack.pop()
        a = self.stack.pop()
        self.stack.append(a / b)
        
    def _print(self, arg):
        print self.stack.pop()
        
    def _pop(self, arg):
        self.stack.pop()

In [11]:
print co.code
vm = VM(co)
vm.run()
vm.stack

[('PUSH', 1), ('PUSH', 2), ('PUSH', 3), ('MUL', None), ('ADD', None), ('PRINT', None), ('PUSH', 2), ('PUSH', 2), ('ADD', None), ('POP', None), ('PUSH', 9), ('PRINT', None)]
7
9


[]

In [12]:
co = compile("""

x=9
z=13
z=9

print x+z

11

""", 'temp', 'exec')

In [13]:
dis.dis(co)

  3           0 LOAD_CONST               0 (9)
              3 STORE_NAME               0 (x)

  4           6 LOAD_CONST               1 (13)
              9 STORE_NAME               1 (z)

  5          12 LOAD_CONST               0 (9)
             15 STORE_NAME               1 (z)

  7          18 LOAD_NAME                0 (x)
             21 LOAD_NAME                1 (z)
             24 BINARY_ADD          
             25 PRINT_ITEM          
             26 PRINT_NEWLINE       

  9          27 LOAD_CONST               2 (None)
             30 RETURN_VALUE        


In [14]:
co.co_names

('x', 'z')

In [15]:
co.co_consts

(9, 13, None)

In [16]:
co.co_code

'd\x00\x00Z\x00\x00d\x01\x00Z\x01\x00d\x00\x00Z\x01\x00e\x00\x00e\x01\x00\x17GHd\x02\x00S'