In [None]:
from IPython.core.display import HTML
with open ("../style.css", "r") as file:
    css = file.read()
HTML(css)

# A Simple Compiler for a Fragment of `C`

This file shows how a simple compiler for a fragment of the programming language `C` can be implemented using `Ply`.

## Specification of the Scanner

The scanner that we implement here is similar to the scanner that we had used for our symbolic calculator.

In [None]:
import ply.lex as lex

In [None]:
tokens = [ 'NUMBER', 'ID', 'EQ', 'NE', 'LE', 'GE', 'AND', 'OR',
           'INT', 'IF', 'ELSE', 'WHILE', 'RETURN'
         ]

The token `Number` specifies a natural number.

In [None]:
t_NUMBER = r'0|[1-9][0-9]*'

Below, we define the tokens for operator symbols consisting of more than one character.

In [None]:
t_EQ  = r'=='
t_NE  = r'!='
t_LE  = r'<='
t_GE  = r'>='
t_AND = r'&&'
t_OR  = r'\|\|'

Our version of `C` allows both single line comments and multi line comments.
- The regular expression `//[^\n]*` recognizes single line comments.
  A single line comment starts with `//` and extends to the end of the line.
- The regular expression `\/\*([^*]|\*+[^*/])*\*+\` recognizes multi line comments.
  Multi line comments start with the string `/*` and end with the string `*/`.
  Between these strings, the string `*/` must not occur.
  * `\/\*` matches the opening `/*`.
  * `[^*]|\*+[^*/]` matches any character that is different from the character `*` as
    well as non-empty sequences of `*`s that are not followed by a `/`.
  * `\*+/` matches any non-empty sequence of `*`s that is followed by a `/`.
  The expression needs to be this complicated in order to match multi line comments of the
  following form:
  ```
  /*** abc *** xyz ***/
  ```

In [None]:
def t_COMMENT(t):
    r'//[^\n]*|\/\*([^*]|\*+[^*/])*\*+\/'
    t.lexer.lineno += t.value.count('\n')
    pass

The keywords `'int', 'if', 'else', 'while', 'return'` have to be dealt with separately as they are syntactical identical to identifiers.  The dictionary `Keywords` shown below maps every keyword to its *token type*.

In [None]:
Keywords = { 'int'   : 'INT', 
             'if'    : 'IF',
             'else'  : 'ELSE', 
             'while' : 'WHILE', 
             'return': 'RETURN'
           }

When an identifier is read, we first have to check whether the identifier is one of our keywords.  If so, we assign the corresponding token type that is stored in the dictionary `Keywords`.  Otherwise, the token type is set to `ID`.

In [None]:
def t_ID(t):
    r'[a-zA-Z][a-zA-Z0-9_]*'
    t.type = Keywords.get(t.value, 'ID')
    return t

Operators consisting of a single character do not need an associated token type.

In [None]:
literals = ['+', '-', '*', '/', '%', '(', ')', '{', '}', ';', '=', '<', '>', '!', ',']

White space, i.e. *space characters*, *tabulators*, and *carriage returns* are ignored. 

In [None]:
t_ignore  = ' \t\r'

Syntactically, newline characters are ignored. However, we still need to keep track of them in order to know which line we are in.  This information is needed later for error messages.

In [None]:
def t_newline(t):
    r'\n'
    t.lexer.lineno += 1
    return

Given a `token`, the function `find_colum` returns the column where `token` starts.
This is possible, because `token.lexer.lexdata` stores the string that is given to the scanner and `token.lexpos` is the number of characters that precede `token`.

In [None]:
def find_column(token):
    program    = token.lexer.lexdata
    line_start = program.rfind('\n', 0, token.lexpos) + 1
    return (token.lexpos - line_start) + 1

The function `t_error` is called for any token `t` that can not be scanned by the lexer.  In this case, `t.value[0]` is the first character that can not be recognized by the scanner.

In [None]:
def t_error(t):
    column = find_column(t)
    print(f"Illegal character '{t.value[0]}' in line {t.lineno}, column {column}.")
    t.lexer.skip(1)

The next assignment is necessary to make the lexer think that the code given above is part of some file.

In [None]:
__file__ = 'main'

In [None]:
lexer = lex.lex()

In [None]:
def test_scanner(file_name):
    with open(file_name, 'r') as handle:
        program = handle.read() 
    print(program)
    lexer.input(program)
    lexer.lineno = 1
    return [t for t in lexer]

In [None]:
for t in test_scanner('Examples/MySum.c'):
    print(t)

## Specification of the Parser

We will use the following grammar to specify the language that our compiler can translate:
```
program
    : function
    | function program

function 
    : INT ID '(' param_list ')' '{' decl_list stmnt_list '}'

param_list 
    : /* epsilon */
    | INT ID 
    | INT ID ',' ne_param_list

ne_param_list
    : INT ID
    | INT ID ',' ne_param_list
    
decl_list
    : /* epsilon */
    | INT ID ';' decl_list

stmnt_list
    : /* epsilon */
    | stmnt stmnt_list
    
stmnt 
    : IF '(' bool_expr ')' stmnt                 
    | IF '(' bool_expr ')' stmnt ELSE stmnt
    | WHILE '(' bool_expr ')' stmnt
    | '{' stmnt_list '}' 
    | ID '=' expr ';'  
    | RETURN expr ';'   
    | expr ';'       

bool_expr 
    : bool_expr '||' bool_expr
    | bool_expr '&&' bool_expr 
    | '!' bool_expr   
    | '(' bool_expr ')'
    | expr '==' expr     
    | expr '!=' expr     
    | expr '<=' expr     
    | expr '>=' expr     
    | expr '<'  expr      
    | expr '>'  expr     
 
expr: expr '+'  expr                 
    | expr '-' expr               
    | expr '*' expr               
    | expr '/' expr 
    | expr '%' expr 
    | '(' expr ')' 
    | NUMBER
    | ID                      
    | ID '(' expr_list ')'       

expr_list
    : /* epsilon */
    | expr
    | expr ',' ne_expr_list

ne_expr_list
    : expr
    | expr ',' ne_expr_list

```
We will use precedence declarations to resolve the ambiguity that is inherent in this grammar.

In [None]:
import ply.yacc as yacc

The *start variable* of our grammar is `program`.

In [None]:
start = 'program'

In [None]:
precedence = (
    ('nonassoc', 'IF'),
    ('nonassoc', 'ELSE'),
    ('left', 'OR'),
    ('left', 'AND'),
    ('right', '!'),
    ('nonassoc', 'EQ', 'NE', 'LE', 'GE', '<', '>'),
    ('left', '+', '-'),
    ('left', '*', '/', '%')
)

In [None]:
def p_program_one(p):
    "program : function"
    p[0] = ('program', p[1])
    
def p_program_more(p):
    "program : function program"
    p[0] = ('program', p[1]) + p[2][1:]

In [None]:
def p_function(p):
    "function : INT ID '(' param_list ')' '{' decl_list stmnt_list '}'"
    p[0] = ('fct', p[2], p[4], p[7], p[8])

In [None]:
def p_param_list_empty(p):
    "param_list :"
    p[0] = ('.', )
    
def p_param_list_one(p):
    "param_list : INT ID"
    p[0] = ('.', p[2])
    
def p_param_list_more(p):
    "param_list : INT ID ',' ne_param_list"
    p[0] = ('.', p[2]) + p[4][1:]

def p_ne_param_list_one(p):
    "ne_param_list : INT ID"
    p[0] = ('.', p[2])
    
def p_ne_param_list_more(p):
    "ne_param_list : INT ID ',' ne_param_list"
    p[0] = ('.', p[2]) + p[4][1:]

In [None]:
def p_decl_list_one(p):
    "decl_list :"
    p[0] = ('.',)

def p_decl_list_more(p):
    "decl_list : INT ID ';' decl_list"
    p[0] = ('.', p[2]) + p[4][1:]

In [None]:
def p_stmnt_list_one(p):
    "stmnt_list :"
    p[0] = ('.',)

def p_stmnt_list_more(p):
    "stmnt_list : stmnt stmnt_list"
    p[0] = ('.', p[1]) + p[2][1:]

In the grammar rule
$$ \texttt{stmnt} \rightarrow \texttt{'if'}\;\texttt{'('}\; \texttt{bool_expr}\; \texttt{')'}\; \texttt{stmnt}$$
the rightmost token is `')'`.  However, this token does not have a precedence.  Therefore, the grammar rule
does not have a precedence either.  Hence, we manually assign the precedence of the token `IF` to this rule via 
the keyword `%prec`.  This way, the shift/reduce conflict resulting from the *dangling-else ambiguity* is resolved.

In [None]:
def p_stmnt_if(p):
    "stmnt : IF '(' bool_expr ')' stmnt %prec IF"
    p[0] = ('if', p[3], p[5])   
    
def p_stmnt_if_else(p):
    "stmnt : IF '(' bool_expr ')' stmnt ELSE stmnt"
    p[0] = ('if-else', p[3], p[5], p[7])
    
def p_stmnt_while(p):
    "stmnt : WHILE '(' bool_expr ')' stmnt"
    p[0] = ('while', p[3], p[5])
    
def p_stmnt_block(p):
    "stmnt : '{' stmnt_list '}'"
    p[0] = p[2]
    
def p_stmnt_assign(p):
    "stmnt : ID '=' expr ';'"
    p[0] = ('=', p[1], p[3])

def p_stmnt_return(p):
    "stmnt : RETURN expr ';'"
    p[0] = ('return', p[2])
    
def p_stmnt_expr(p):
    "stmnt : expr ';'"
    p[0] = p[1]

In [None]:
def p_bool_expr_or(p):
    "bool_expr : bool_expr OR bool_expr"
    p[0] = ('||', p[1], p[3])
    
def p_bool_expr_and(p):
    "bool_expr : bool_expr AND bool_expr"
    p[0] = ('&&', p[1], p[3])

def p_bool_expr_neg(p):
    "bool_expr : '!' bool_expr"
    p[0] = ('!', p[2])
    
def p_bool_expr_paren(p):
    "bool_expr : '(' bool_expr ')'"
    p[0] = p[2]
    
def p_bool_expr_eq(p):
    "bool_expr : expr EQ expr"
    p[0] = ('==', p[1], p[3])

def p_bool_expr_ne(p):
    "bool_expr : expr NE expr"
    p[0] = ('!=', p[1], p[3])

def p_bool_expr_le(p):
    "bool_expr : expr LE expr"
    p[0] = ('<=', p[1], p[3])
    
def p_bool_expr_ge(p):
    "bool_expr : expr GE expr"
    p[0] = ('>=', p[1], p[3])
    
def p_bool_expr_lt(p):
    "bool_expr : expr '<' expr"
    p[0] = ('<', p[1], p[3])

def p_bool_expr_gt(p):
    "bool_expr : expr '>' expr"
    p[0] = ('>', p[1], p[3])

In [None]:
def p_expr_plus(p):
    "expr : expr '+' expr"
    p[0] = ('+', p[1], p[3])
    
def p_expr_minus(p):
    "expr : expr '-' expr"
    p[0] = ('-', p[1], p[3])
    
def p_expr_times(p):
    "expr : expr '*' expr"
    p[0] = ('*', p[1], p[3])
    
def p_expr_divide(p):
    "expr : expr '/' expr"
    p[0] = ('/', p[1], p[3])
    
def p_expr_modulo(p):
    "expr : expr '%' expr"
    p[0] = ('%', p[1], p[3])
    
def p_expr_group(p):
    "expr : '(' expr ')'"
    p[0] = p[2]

def p_expr_number(p):
    "expr : NUMBER"
    p[0] = ('Number', p[1])

def p_expr_id(p):
    "expr : ID"
    p[0] = p[1]
    
def p_expr_fct_call(p):
    "expr : ID '(' expr_list ')'"
    p[0] = ('call', p[1]) + p[3][1:]

In [None]:
def p_expr_list_empty(p):
    "expr_list :"
    p[0] = ('.',)
    
def p_expr_list_one(p):
    "expr_list : expr"
    p[0] = ('.', p[1])     

def p_expr_list_more(p):
    "expr_list : expr ',' ne_expr_list"
    p[0] = ('.', p[1]) + p[3][1:]     

def p_ne_expr_list_one(p):
    "ne_expr_list : expr"
    p[0] = ('.', p[1]) 
    
def p_ne_expr_list_more(p):
    "ne_expr_list : expr ',' ne_expr_list"
    p[0] = ('.', p[1]) + p[3][1:] 

In [None]:
def p_error(p):
    column = find_column(p)
    if p:
        print(f'Syntax error at token "{p.value}" in line {p.lineno}, column {column}.')
    else:
        print('Syntax error at end of input.')

Setting the optional argument `write_tables` to `False` **is required** to prevent an obscure bug where the parser generator tries to read an empty parse table.  

In [None]:
parser = yacc.yacc(write_tables=False, debug=True)

As we have used *precedence declarations* to resolve all shift/reduce conflicts, the action table contains no conflict.  

In [None]:
!cat parser.out

The notebook `AST-2-Dot.ipynb` provides the function `tuple2dot`.  This function can be used to visualize the abstract syntax tree that is generated by the function `yacc.parse`.

In [None]:
%run ../ANTLR4-Python/AST-2-Dot.ipynb

The function `parse` takes a `file_name` as ist sole argument.  The file is read and parsed. 
The resulting parse tree is visualized using `graphviz`.  It is important to reset the
attribute `lineno` of the scanner, for otherwise error messages will not have the correct line numbers.

In [None]:
def parse(file_name):
    lexer.lineno = 1
    with open(file_name, 'r') as handle:
        program = handle.read() 
    ast = yacc.parse(program)
    print(ast)
    return tuple2dot(ast)

In [None]:
!cat -n Examples/MySum.c

In [None]:
parse('Examples/MySum.c')

The function `indent` is used to indent the generated assembler commands by preceding them with 8 space characters. 

In [None]:
def indent(s):
    return ' ' * 8 + s

The method `compile_expr(expr, st, class_name)` takes three arguments:
- `expr` is an *abstract syntax tree* that represents an expression.  
  This abstract syntax tree is in turn represented as a nested tuple.  
- `st` is short for *symbol table*.  This is a dictionary that maps variable
  names to natural numbers.  Given a variable `x`, the number `st[x]` specifies
  the location where the variable `x` is stored on the stack with respect to the 
  local stack frame.
- `class_name` is the name of the class that is to be generated.

The function returns a pair of the form `(cmds, size)`.
- `cmds` is a list of assembler commands,
- `size` is the maximum size of the stack that is needed. 

In [None]:
def compile_expr(expr, st, class_name):
    if isinstance(expr, str):
        Cmd = indent(f'iload {st[expr]}')
        return [Cmd], 1
    elif expr[0] == 'Number':
        _, n = expr
        Cmd  = indent(f'ldc {n}')
        return [Cmd], 1
    elif expr[0] in  ['+', '-', '*', '/', '%']:
        op, lhs, rhs = expr
        L1, sz1 = compile_expr(lhs, st, class_name)
        L2, sz2 = compile_expr(rhs, st, class_name)
        OpToCmd = { '+': 'iadd', '-': 'isub', '*': 'imul', '/': 'idiv', '%': 'irem' }
        Cmd     = indent(OpToCmd[op])
        return L1 + L2 + [Cmd], max(sz1, 1 + sz2)
    elif expr[0] == 'call' and expr[1] == 'println':
        _, _, *args = expr
        CmdLst    = [indent('getstatic java/lang/System/out Ljava/io/PrintStream;')]
        stck_size = 0
        cnt       = 0
        for arg in args:
            L, sz_arg = compile_expr(arg, st, class_name)
            stck_size = max(stck_size, cnt + 1 + sz_arg)
            CmdLst   += L
            cnt      += 1
        CmdLst += [indent(f'invokevirtual java/io/PrintStream/println({"I"*cnt})V')]
        return CmdLst, stck_size
    elif expr[0] == 'call' and expr[1] != 'println':
        _, f, *args = expr
        CmdLst    = []
        stck_size = 0
        cnt       = 0
        for arg in args:
            L, sz_arg = compile_expr(arg, st, class_name)
            stck_size = max(stck_size, cnt + sz_arg)
            CmdLst   += L
            cnt      += 1
        CmdLst += [indent(f'invokestatic {class_name}/{f}({"I"*cnt})I')]
        return CmdLst, max(stck_size, 1)
    else:
        assert False, f'Error in compile_expr({expr}, {st}, {class_name})'

The following is a test of the function `compile_expr`.

In [None]:
expr = ('call', 'println', 'x', ('call', 'sum', ('+', 'x', ('*', 'y', ('Number','2')))))
st   = { 'x': 0, 'y': 1}
compile_expr(expr, st, 'Sum')

The variable `label_counter` is a global counter that is used to create unique label names.
Every call of `new_label` creates a new, unique label.

In [None]:
label_counter = 0

def new_label():
    global label_counter
    label_counter += 1
    return 'l' + str(label_counter)

The method `compile_bool(expr, st, class_name)` takes three arguments:
- `expr` is an *abstract syntax tree* that represents a Boolean expression.  
  This abstract syntax tree is in turn represented as a nested tuple.  
- `st` is short for *symbol table*.  This is a dictionary that maps variable
  names to natural numbers.  Given a variable `x`, the number `st[x]` specifies
  the location where the variable `x` is stored on the stack with respect to the 
  local stack frame.
- `class_name` is the name of the class that is to be generated.

The function returns a pair of the form `(cmds, size)`.
- `cmds` is a list of assembler commands,
- `size` is the maximum size of the stack that is needed. 

In [None]:
def compile_bool(expr, st, class_name):
    if expr[0] in ['==', '!=', '<=', '>=', '<', '>']:
        OpToCmd = { '==': 'if_icmpeq', 
                    '!=': 'if_icmpne', 
                    '<=': 'if_icmple',
                    '>=': 'if_icmpge',
                    '<' : 'if_icmplt',
                    '>' : 'if_icmpgt'
                  }
        op, lhs, rhs = expr
        L1, sz1    = compile_expr(lhs, st, class_name)
        L2, sz2    = compile_expr(rhs, st, class_name)
        true_label = new_label()
        next_label = new_label()
        CmdLst     = L1 + L2
        cmd        = OpToCmd[op]
        CmdLst    += [indent(cmd + ' ' + true_label)]
        CmdLst    += [indent('bipush 0')]
        CmdLst    += [indent('goto ' + next_label)]
        CmdLst    += [' ' * 4 + true_label + ':']
        CmdLst    += [indent('bipush 1')]
        CmdLst    += [' ' * 4 + next_label + ':']
        return CmdLst, max(sz1, 1 + sz2)
    elif expr[0] in ['&&', '||']:
        op, lhs, rhs = expr
        OpToCmd      = { '&&': iand, '||': 'ior' }
        L1, sz1      = compile_bool(lhs, st, class_name)
        L2, sz2      = compile_bool(rhs, st, class_name)
        cmd          = OpToCmd[op]
        CmdLst       = L1 + L2 + [indent(cmd)]
        return CmdLst, max(sz1, 1 + sz2)
    elif expr[0] == '!':
        _, arg = expr
        L, sz  = compile_expr(arg, st, class_name)
        CmdLst = [indent('bipush 1')] + L + [indent('isub')]
        return CmdLst, max(sz1, sz + 1)
    else:
        assert False, f'Error in compile_bool({expr}, {st}, {class_name})'

Below is a test for the function `compile_bool`.

In [None]:
expr = ('==', 'x', ('Number', '0'))
st   = { 'x': 0, 'y': 1}
compile_bool(expr, st, 'Sum')

The method `compile_stmnt(stmnt, st, class_name)` takes three arguments:
- `stmnt` is an *abstract syntax tree* that represents a statement.  
  This abstract syntax tree is in turn represented as a nested tuple.  
- `st` is short for *symbol table*.  This is a dictionary that maps variable
  names to natural numbers.  Given a variable `x`, the number `st[x]` specifies
  the location where the variable `x` is stored on the stack with respect to the 
  local stack frame.
- `class_name` is the name of the class that is to be generated.

The function returns a pair of the form `(cmds, size)`.
- `cmds` is a list of assembler commands,
- `size` is the maximum size of the stack that is needed. 

In [None]:
def compile_stmnt(stmnt, st, class_name):
    if stmnt[0] == '=':
        _, var, expr = stmnt
        CmdLst, sz = compile_expr(expr, st, class_name)
        CmdLst    += [indent(f'istore {st[var]}')]
        return CmdLst, sz
    elif stmnt[0] == 'if':
        _, expr, sub_stmnt = stmnt
        L1, sz1    = compile_bool(expr, st, class_name)
        L2, sz2    = compile_stmnt(sub_stmnt, st, class_name)
        else_label = new_label()
        lbl_stmnt  = ' ' * 4 + else_label + ':'
        CmdLst = L1 + [indent(f'ifeq {else_label}')] + L2 + [lbl_stmnt]
        return CmdLst, max(sz1, sz2)
    elif stmnt[0] == 'if-else':
        _, expr, then_stmnt, else_stmnt = stmnt
        L1, sz1    = compile_bool(expr, st, class_name)
        L2, sz2    = compile_stmnt(then_stmnt, st, class_name)
        L3, sz3    = compile_stmnt(else_stmnt, st, class_name)        
        else_label = new_label()
        next_label = new_label()
        if_stmnt   = indent(f'ifeq {else_label}')
        else_stmnt = ' ' * 4 + else_label + ':'
        next_stmnt = ' ' * 4 + next_label + ':'
        goto_stmnt = indent(f'goto {next_label}')
        CmdLst = L1 + [if_stmnt] + L2 + [goto_stmnt, else_stmnt] + L3 + [next_stmnt]
        return CmdLst, max(sz1, sz2, sz3)
    elif stmnt[0] == 'while':
        _, expr, body_stmnt = stmnt
        L1, sz1    = compile_bool(expr, st, class_name)
        L2, sz2    = compile_stmnt(body_stmnt, st, class_name)
        loop_label = new_label()
        next_label = new_label()
        if_stmnt   = indent(f'ifeq {next_label}')
        loop_stmnt = ' ' * 4 + loop_label + ':'        
        next_stmnt = ' ' * 4 + next_label + ':'
        goto_stmnt = indent(f'goto {loop_label}')
        CmdLst = [loop_stmnt] + L1 + [if_stmnt] + L2 + [goto_stmnt, next_stmnt]
        return CmdLst, max(sz1, sz2)
    elif stmnt[0] == 'return':
        _, expr = stmnt
        CmdLst, sz = compile_expr(expr, st, class_name)
        CmdLst    += [indent('ireturn')]
        return CmdLst, sz
    elif stmnt[0] == '.':
        _, *stmnt_lst = stmnt 
        CmdLst = []
        size   = 0
        for s in stmnt_lst:
            L, sz = compile_stmnt(s, st, class_name)
            CmdLst += L
            size   = max(size, sz)
        return CmdLst, size
    else: # it must be an expression statement
        return compile_expr(stmnt, st, class_name)

In [None]:
stmnt = ('if', ('==', ('/', 'x', 'y'), ('Number', '0')), ('=', 'x', 'y'))
compile_stmnt(stmnt, st, 'Sum')

In [None]:
stmnt = ('if-else', ('<', 'x', 'y'), ('=', 'x', 'y'), ('=', 'y', 'x'))
compile_stmnt(stmnt, st, 'Sum')

In [None]:
stmnt = ('while', ('<', 'x', 'y'), ('=', 'x', ('+', 'x', ('Number', '1'))))
compile_stmnt(stmnt, st, 'Sum')

In [None]:
stmnt = ('.', ('=', 'x', 'y'), ('.', ('=', 'x', ('Number', '1')), ('=', 'y', 'x')))
compile_stmnt(stmnt, st, 'Sum')

In [None]:
def compile_fct(fct_def, class_name):
    global label_counter
    label_counter = 0
    _, name, parameters, variables, stmnts = fct_def
    _, *parameters = parameters
    _, *variables  = variables
    _, *stmnts     = stmnts
    m   = len(parameters)
    n   = len(variables)
    st  = {}
    cnt = 0
    for var in parameters:
        st[var] = cnt
        cnt    += 1
    for var in variables:
        st[var] = cnt
        cnt    += 1
    CmdLst = []
    size   = 0
    for stmnt in stmnts:
        L, sz = compile_stmnt(stmnt, st, class_name)
        CmdLst += L
        size = max(size, sz)
    limit_locals = f'.limit locals {m+n}'
    limit_stack  = f'.limit stack  {size}'
    return_stmnt = indent('return')
    if name != 'main':
        method = f'.method public static {name}({"I"*m})I'
        CmdLst = [method, limit_locals, limit_stack] + CmdLst + ['.end method']
        return CmdLst, sz
    else:
        method = '.method public static main([Ljava/lang/String;)V'
        CmdLst = [method, limit_locals, limit_stack] + CmdLst + [return_stmnt, '.end method']
        return CmdLst, sz

In [None]:
f = ('fct', 'sum', ('.', 'x'), ('.', 'y', 'z'), ('.', ('return', 'x')))
compile_fct(f, 'Sum')

In [None]:
import os

In [None]:
file = "~/Dropbox/Kurse/Formal-Languages/Ply/Examples/Test.c"
print(os.path.dirname(file))
print(os.path.basename(file))

In [None]:
def compile_program(file_name):
    directory = os.path.dirname(file_name)
    base      = os.path.basename(file_name)
    base      = base[:-2]
    outfile   = directory + ('' if directory == '' else '/') + base + '.jas'
    with open(file_name, 'r') as handle:
        program = handle.read()
    lexer.lineno = 0
    ast = yacc.parse(program)
    _, *fct_lst = ast
    CmdLst = []
    for fct in fct_lst:
        L, _ = compile_fct(fct, base)
        CmdLst += L + ['\n']
    with open(outfile, 'w') as handle:
        handle.write('.class public ' + base + '\n');
        handle.write('.super java/lang/Object\n\n');
        handle.write('.method public <init>()V\n');
        handle.write('    aload 0\n');
        handle.write('    invokenonvirtual java/lang/Object/<init>()V\n');
        handle.write('    return\n');
        handle.write('.end method\n\n');
        for cmd in CmdLst:
            handle.write(cmd + '\n')

In [None]:
%cd Examples

In [None]:
%cat Primes.c

In [None]:
compile_program('Primes.c')

In [None]:
!ls

In [None]:
%cat 'Primes.jas'

Next, we generate Java byte code using
[jasmin](http://jasmin.sourceforge.net).

In [None]:
!jasmin Primes.jas

Finally, we run the generated byte code.

In [None]:
!java Primes

In [None]:
!rm *.jas *.class

In [None]:
!ls -al