Let's first define available tokens

In [10]:
from enum import Enum

class Token(Enum):
    EOI       = 0  # End of input
    SEMI      = 1  # ;
    PLUS      = 2  # +
    TIMES     = 3  # *
    LP        = 4  # (
    RP        = 5  # )
    NUM_OR_ID = 6  # decimal number or identifier

Define character to token mapping

In [11]:
character_to_token = {
    ';': Token.SEMI,
    '+': Token.PLUS,
    '*': Token.TIMES,
    '(': Token.LP,
    ')': Token.RP
}

In [12]:
from collections import namedtuple
Lexeme = namedtuple('Lexeme', ['type', 'value'])

In [13]:
def lex(chars):
    if len(chars) == 0:
        return Lexeme(Token.EOI, None)

    current = chars.pop(0)
    
    while 1:
        while len(chars) > 0 and current == ' ':
            current = chars.pop(0)
        
        while len(chars) > 0:
            # Get the next token
            if current in character_to_token:
                return Lexeme(character_to_token[current], None)
            elif current in ('\n', '\t', ' '):
                return None
            else:
                if not current.isalnum():
                    print(f"Ignoring illegal input {current}")
                    return None
                else:
                    value = [current]
                    while len(chars) > 0 and chars[0].isalnum():
                        value.append(chars.pop(0))
                    return Lexeme(Token.NUM_OR_ID, value)

        if len(chars) == 0:
            return Lexeme(Token.EOI, None)

    return Lexeme(Token.EOI, None)

In [14]:
class Lexer(object):
    def __init__(self, chars):
        self.lookahead = None
        self.chars = chars

    def match(self, token):
        # Return true if "token" matches the current lookahead symbol.
        if not self.lookahead:
            while not self.lookahead:
                self.lookahead = lex(self.chars)
                print(f"New lookahead: {self.lookahead}")

        return token == self.lookahead.type

    def advance(self):
        # Advance the lookahead to the next input symbol.
        self.lookahead = lex(self.chars)
        print(f"Changed lookahead: {self.lookahead}")

In [19]:
class ParserException(Exception):
    """Parser exception."""

class Parser(object):
    def __init__(self, lexer):
        self.lexer = lexer
    
    def legal_lookahead(self, lookaheads):
        error_printed = False
        while not self.lexer.match(Token.SEMI):
            for lookahead in lookaheads:
                if self.lexer.match(lookahead):
                    return True
            if not error_printed:
                print("Line no TODO: Syntax error")
                error_printed = True
            self.lexer.advance()
        return False
            
    
    def factor(self):
        if not self.legal_lookahead([Token.NUM_OR_ID, Token.LP]):
            return
        
        if self.lexer.match(Token.NUM_OR_ID):
            self.lexer.advance()
        elif self.lexer.match(Token.LP):
            self.lexer.advance()
            self.expression()
            if self.lexer.match(Token.RP):
                self.lexer.advance()
            else:
                raise ParserException("Mismatch parenthesis TODO: Lineno")
        else:
            raise ParserException("Number or identifier expected TODO: lineno")
    
    def term(self):
        if not self.legal_lookahead([Token.NUM_OR_ID, Token.LP]):
            return
        
        self.factor()
        
        while self.lexer.match(Token.TIMES):
            self.lexer.advance()
            self.factor()

    def expression(self):
        """
        expression  -> term expression'
        expression' -> PLUS term expression' | epsilon
        """
        print(f"expression {self.lexer.chars}")
        
        if not self.legal_lookahead([Token.NUM_OR_ID, Token.LP]):
            return
        
        self.term()
        
        while self.lexer.match(Token.PLUS):
            self.lexer.advance()
            self.term()

    def statements(self):
        """
        statements -> expression SEMI
                |     expression SEMI statement
        """
        print(f"statements {self.lexer.chars}")
        
        """
        self.expression()

        if self.lexer.match(Token.SEMI):
            self.lexer.advance()
        else:
            print("Inserting missing semicolon: LINENO (TODO)")

        if (not self.lexer.match(Token.EOI)):
            self.statements()
        """
        
        while not self.lexer.match(Token.EOI):
            self.expression()
            
            if self.lexer.match(Token.SEMI):
                self.lexer.advance()
            else:
                print("Inserting missing semicolon (TODO: line number)")

In [20]:
expressions = """
2 + 2;
"""

chars = list(expressions)

lexer = Lexer(chars)
parser = Parser(lexer)
parser.statements()

statements ['\n', '2', ' ', '+', ' ', '2', ';', '\n']
New lookahead: None
New lookahead: Lexeme(type=<Token.NUM_OR_ID: 6>, value=['2'])
expression [' ', '+', ' ', '2', ';', '\n']
Changed lookahead: Lexeme(type=<Token.PLUS: 2>, value=None)
Changed lookahead: Lexeme(type=<Token.NUM_OR_ID: 6>, value=['2'])
Changed lookahead: Lexeme(type=<Token.SEMI: 1>, value=None)
Changed lookahead: Lexeme(type=<Token.EOI: 0>, value=None)


In [21]:
invalid_expressions = """
2 + *;
"""

chars = list(invalid_expressions)

lexer = Lexer(chars)
parser = Parser(lexer)
parser.statements()

statements ['\n', '2', ' ', '+', ' ', '*', ';', '\n']
New lookahead: None
New lookahead: Lexeme(type=<Token.NUM_OR_ID: 6>, value=['2'])
expression [' ', '+', ' ', '*', ';', '\n']
Changed lookahead: Lexeme(type=<Token.PLUS: 2>, value=None)
Changed lookahead: Lexeme(type=<Token.TIMES: 3>, value=None)
Line no TODO: Syntax error
Changed lookahead: Lexeme(type=<Token.SEMI: 1>, value=None)
Changed lookahead: Lexeme(type=<Token.EOI: 0>, value=None)
