# Episode 19: PyParsing Actions, with some ANTLR

In a vain attempt to run while chewing bubble-gum, let's finish up some tasks left undone in our previous episode while learning yet another parser generator.

Our plan of attack:

* Add actions to the parser we built with PyParsing so we have a parse tree.

* Start playing with ANTLR.

## Part I: PyParsing Actions

In [1]:
import pyparsing

pyparsing.ParserElement.setDefaultWhitespaceChars(' \t')

ID = pyparsing.Word(pyparsing.alphas)
NUM = pyparsing.Word(pyparsing.nums)
atom = ID | NUM
op = pyparsing.Char('+-')
expression = pyparsing.Forward()
expression <<= atom + pyparsing.Optional(op + expression)
assign = ID + '=' + expression
statement = (assign | expression) + pyparsing.LineEnd()
statements = statement[...]

test_src = '1 + 2 - 3\nA = 42\n'
statements.parseString(test_src)

(['1', '+', '2', '-', '3', '\n', 'A', '=', '42', '\n'], {})

In [2]:
import nbimport
import Episode16

In [9]:
Tree = Episode16.Tree
statements.setParseAction(lambda toks: Tree('statements', list(toks)))
statements.parseString(test_src)[0]

Tree(contents='statements', children=['1', '+', '2', '-', '3', '\n', 'A', '=', '42', '\n'])

In [12]:
statement.setParseAction(lambda toks: Tree('statement', list(toks)))
assign.setParseAction(lambda toks: Tree('assign', list(toks)))
expression.setParseAction(lambda toks: Tree('expression', list(toks)))
op.setParseAction(lambda toks: Tree('op', [Tree(tok, []) for tok in toks]))
atom.setParseAction(lambda toks: Tree('atom', [Tree(tok, []) for tok in toks]))
parse_tree = statements.parseString(test_src)[0]
def tree_to_tuple(node):
    return (node.contents, [tree_to_tuple(child) if isinstance(child, Tree) else child for child in node.children])
tree_to_tuple(parse_tree)

('statements',
 [('statement',
   [('expression',
     [('atom', [('1', [])]),
      ('op', [('+', [])]),
      ('expression',
       [('atom', [('2', [])]),
        ('op', [('-', [])]),
        ('expression', [('atom', [('3', [])])])])]),
    '\n']),
  ('statement',
   [('assign', ['A', '=', ('expression', [('atom', [('42', [])])])]), '\n'])])

## Part II: ANTLR

To create an input to ANTLR, we'll adopt the EBNF from episode 16 to ANTLR's metagrammar (there's that prefix again...).

The result:

```
grammar calc;

statements: (statement NL)* ;
statement: assign | expr ;
assign: ID '=' expr ;
expr: atom (op expr)? ;
atom: ID | NUM ;
op: '+' | '-' ;
NL: [\r\n]+ ;
ID: [A-Za-z]+ ;
NUM: [0-9]+ ;
WS: [ \t]+ -> skip ;
```

In [13]:
!conda install antlr
!pip install antlr4-python3-runtime
import metamagics, antlrmagic

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.



In [14]:
%load_ext metamagics

In [18]:
!antlr4 -Dlanguage=Python3 calc.g4
!cat calcLexer.py calcParser.py calcListener.py

# Generated from calc.g4 by ANTLR 4.9.2
from antlr4 import *
from io import StringIO
import sys
if sys.version_info[1] > 5:
    from typing import TextIO
else:
    from typing.io import TextIO



def serializedATN():
    with StringIO() as buf:
        buf.write("\3\u608b\ua72a\u8133\ub9ed\u417c\u3be7\u7786\u5964\2\t")
        buf.write("-\b\1\4\2\t\2\4\3\t\3\4\4\t\4\4\5\t\5\4\6\t\6\4\7\t\7")
        buf.write("\4\b\t\b\3\2\3\2\3\3\3\3\3\4\3\4\3\5\6\5\31\n\5\r\5\16")
        buf.write("\5\32\3\6\6\6\36\n\6\r\6\16\6\37\3\7\6\7#\n\7\r\7\16\7")
        buf.write("$\3\b\6\b(\n\b\r\b\16\b)\3\b\3\b\2\2\t\3\3\5\4\7\5\t\6")
        buf.write("\13\7\r\b\17\t\3\2\6\4\2\f\f\17\17\4\2C\\c|\3\2\62;\4")
        buf.write("\2\13\13\"\"\2\60\2\3\3\2\2\2\2\5\3\2\2\2\2\7\3\2\2\2")
        buf.write("\2\t\3\2\2\2\2\13\3\2\2\2\2\r\3\2\2\2\2\17\3\2\2\2\3\21")
        buf.write("\3\2\2\2\5\23\3\2\2\2\7\25\3\2\2\2\t\30\3\2\2\2\13\35")
        buf.write("\3\2\2\2\r\"\3\2\2\2\17\'\3\2\2\2

In [19]:
%%mycell antlrmagic.antlrify
grammar calc2;

statements: (statement NL)* ;
statement: assign | expr ;
assign: ID '=' expr ;
expr: atom (op expr)? ;
atom: ID | NUM ;
op: '+' | '-' ;
NL: [\r\n]+ ;
ID: [A-Za-z]+ ;
NUM: [0-9]+ ;
WS: [ \t]+ -> skip ;

Imported calc2Lexer
Imported calc2Parser
Imported calc2Listener


In [22]:
import antlr4, importlib

importlib.reload(calcLexer)
importlib.reload(calcParser)

def parse_statements(src):
    src_stream = antlr4.InputStream(src)
    lexer = calcLexer.calcLexer(src_stream)
    tok_stream = antlr4.CommonTokenStream(lexer)
    parser = calcParser.calcParser(tok_stream)
    return parser.statements()

antlr_tree = parse_statements(test_src)
antlr_tree.getText()

'1+2-3\nA=42\n'

In [24]:
def parse_statements2(src):
    src_stream = antlr4.InputStream(src)
    lexer = calc2Lexer.calc2Lexer(src_stream)
    tok_stream = antlr4.CommonTokenStream(lexer)
    parser = calc2Parser.calc2Parser(tok_stream)
    return parser.statements()

antlr_tree2 = parse_statements2(test_src)
antlr_tree2.getText()

'1+2-3\nA=42\n'

In [25]:
antlr_tree2

<calc2Parser.calc2Parser.StatementsContext at 0x10ea6e200>

In [28]:
list(antlr_tree2.getChildren()), antlr_tree2.getRuleIndex()

([<calc2Parser.calc2Parser.StatementContext at 0x10ea6e350>,
  <antlr4.tree.Tree.TerminalNodeImpl at 0x10e75dbc0>,
  <calc2Parser.calc2Parser.StatementContext at 0x10ea6e7b0>,
  <antlr4.tree.Tree.TerminalNodeImpl at 0x10e6fe7c0>],
 0)

In [32]:
RULE_TO_NT = {
    calc2Parser.calc2Parser.RULE_statements : 'statements',
    calc2Parser.calc2Parser.RULE_statement : 'statement',
    calc2Parser.calc2Parser.RULE_assign : 'assign',
    calc2Parser.calc2Parser.RULE_expr : 'expr',
    calc2Parser.calc2Parser.RULE_atom : 'atom',
    calc2Parser.calc2Parser.RULE_op : 'op'
}
def ctx_to_tree(ctx):
    return Tree(RULE_TO_NT.get(ctx.getRuleIndex()), [
        ctx_to_tree(child) if isinstance(child, antlr4.ParserRuleContext) else Tree(child, [])
        for child in ctx.getChildren()
    ])
ctx_to_tree(antlr_tree2)

Tree(contents='statements', children=[Tree(contents='statement', children=[Tree(contents='expr', children=[Tree(contents='atom', children=[Tree(contents=<antlr4.tree.Tree.TerminalNodeImpl object at 0x10e874540>, children=[])]), Tree(contents='op', children=[Tree(contents=<antlr4.tree.Tree.TerminalNodeImpl object at 0x10e778880>, children=[])]), Tree(contents='expr', children=[Tree(contents='atom', children=[Tree(contents=<antlr4.tree.Tree.TerminalNodeImpl object at 0x10e704bc0>, children=[])]), Tree(contents='op', children=[Tree(contents=<antlr4.tree.Tree.TerminalNodeImpl object at 0x10e7b84c0>, children=[])]), Tree(contents='expr', children=[Tree(contents='atom', children=[Tree(contents=<antlr4.tree.Tree.TerminalNodeImpl object at 0x10e732540>, children=[])])])])])]), Tree(contents=<antlr4.tree.Tree.TerminalNodeImpl object at 0x10e75dbc0>, children=[]), Tree(contents='statement', children=[Tree(contents='assign', children=[Tree(contents=<antlr4.tree.Tree.TerminalNodeImpl object at 0x1

In [33]:
tree_to_tuple(_)

('statements',
 [('statement',
   [('expr',
     [('atom', [(<antlr4.tree.Tree.TerminalNodeImpl at 0x10e874540>, [])]),
      ('op', [(<antlr4.tree.Tree.TerminalNodeImpl at 0x10e778880>, [])]),
      ('expr',
       [('atom', [(<antlr4.tree.Tree.TerminalNodeImpl at 0x10e704bc0>, [])]),
        ('op', [(<antlr4.tree.Tree.TerminalNodeImpl at 0x10e7b84c0>, [])]),
        ('expr',
         [('atom',
           [(<antlr4.tree.Tree.TerminalNodeImpl at 0x10e732540>, [])])])])])]),
  (<antlr4.tree.Tree.TerminalNodeImpl at 0x10e75dbc0>, []),
  ('statement',
   [('assign',
     [(<antlr4.tree.Tree.TerminalNodeImpl at 0x10e724c00>, []),
      (<antlr4.tree.Tree.TerminalNodeImpl at 0x10e722440>, []),
      ('expr',
       [('atom',
         [(<antlr4.tree.Tree.TerminalNodeImpl at 0x10e6fec00>, [])])])])]),
  (<antlr4.tree.Tree.TerminalNodeImpl at 0x10e6fe7c0>, [])])