# Episode 16: Handmade Parsers

Calculators are the wave of the past...long live calculators!

To make matters even more old school, let's write a simple calculator language by hand!

Grammar:

```
statements : EMPTY | statement statements
statement  : (assign | expression) '\n'
assign     : ID '=' expression
expression : atom (op expression)?
atom       : ID | NUM
op         : '+' | '-'
```

## Step 1: Lexical Analysis

In [12]:
import typing

EMPTY = 0
NL = 1
ID = 2
EQ = 3
NUM = 4
PLUS = 5
MINUS = 6

Token = typing.Tuple[int, str]

def calc_lexer(source: str) -> typing.Iterator[Token]:
    index = 0
    while index < len(source):
        char = source[index]
        if char in ' \t':
            pass
        elif char == '\n':
            yield (NL, char)
        elif char.isalpha():
            result = char
            index += 1
            if index < len(source):
                char = source[index]
                while index < len(source) and char.isalpha():
                    result += char
                    index += 1
                    char = source[index]
            yield (ID, result)
            continue
        elif char == '=':
            yield (EQ, char)
        elif char.isdigit():
            result = char
            index += 1
            if index < len(source):
                char = source[index]
                while index < len(source) and char.isdigit():
                    result += char
                    index += 1
                    char = source[index]
            yield(NUM, result)
            continue
        elif char == '+':
            yield (PLUS, char)
        elif char == '-':
            yield (MINUS, char)
        else:
            raise SyntaxError(f'Unexpected character in input string: {char}')
        index += 1
    yield (EMPTY, '')

In [13]:
list(calc_lexer('A = 2\nB = 3\nA + B'))

[(2, 'A'),
 (3, '='),
 (4, '2'),
 (1, '\n'),
 (2, 'B'),
 (3, '='),
 (4, '3'),
 (1, '\n'),
 (2, 'A'),
 (5, '+'),
 (2, 'B'),
 (0, '')]

## Step 2: Syntactic Analysis

In [17]:
class Tokens(typing.NamedTuple):
    lookahead: typing.List[Token]
    stream: typing.Iterator[Token]

class Tree(typing.NamedTuple):
    contents: typing.Union[str, Token]
    children: typing.List['Tree']

def peek(tokens: Tokens) -> Token:
    next_token = next(tokens.stream)
    tokens.lookahead.push(next_token)
    return tokens.lookahead[-1]

def next_token(tokens: Tokens) -> Token:
    if len(tokens.lookahead) > 0:
        return tokens.lookahead.pop()
    return next(tokens.stream)

def parse_statements(tokens: Tokens) -> Tree:
    token = next_token(tokens)
    if token[0] == EMPTY:
        result = Tree(token, [])
    else:
        child_0 = parse_statement(tokens)
        child_1 = parse_statements(tokens)
        result = Tree('statements', [child_0, child_1])
    return result

def parse_statement(tokens: Tokens) -> Tree:
    token = peek(tokens)
    if token[0] == ID:
        token = peek(tokens)
        if token[0] == EQ:
            child_result = parse_assign(tokens)
        else:
            child_result = parse_expression(tokens)
    else:
        child_result = parse_expression(tokens)
    nl_token = next_token(tokens)
    assert nl_token[0] == NL
    return Tree('statement', [child_result, nl_token])

def parse_assign(tokens: Tokens) -> Tree:
    identifier = next_token(tokens)
    eq = next_token(tokens)
    assert eq[0] == EQ
    rhs = parse_expression(tokens)
    return Tree('assign', [identifier, eq, rhs])

def parse_expression(tokens: Tokens) -> Tree:
    atom = parse_atom(tokens)
    token = peek(tokens)
    if token[0] in (PLUS, MINUS):
        op = parse_op(tokens)
        rhs = parse_expression(tokens)
        result = Tree('expression', [atom, op, rhs])
    else:
        result = atom
    return result

def parse_atom(tokens: Tokens) -> Tree:
    token = next_token(tokens)
    assert token[0] in (ID, NUM)
    return Tree('atom', [Tree(token, [])])

def parse_op(tokens: Tokens) -> Tree:
    token = next_token(tokens)
    assert token[0] in (PLUS, MINUS)
    return Tree('op', [Tree(token, [])])

def calc_parse(tokens: Token) -> Tree:
    return parse_statements(tokens)

## Step 3: Tie Everything Together

In [18]:
def calc_frontend(source: str) -> Tree:
    return calc_parse(Tokens([], calc_lexer(source)))

In [19]:
calc_frontend('1 + 3 - 5')

AttributeError: 'list' object has no attribute 'push'