Permalink
Find file
Fetching contributors…
Cannot retrieve contributors at this time
774 lines (601 sloc) 21.2 KB
""" SPI - Simple Pascal Interpreter. Part 11."""
from collections import OrderedDict
###############################################################################
# #
# LEXER #
# #
###############################################################################
# Token types
#
# EOF (end-of-file) token is used to indicate that
# there is no more input left for lexical analysis
INTEGER = 'INTEGER'
REAL = 'REAL'
INTEGER_CONST = 'INTEGER_CONST'
REAL_CONST = 'REAL_CONST'
PLUS = 'PLUS'
MINUS = 'MINUS'
MUL = 'MUL'
INTEGER_DIV = 'INTEGER_DIV'
FLOAT_DIV = 'FLOAT_DIV'
LPAREN = 'LPAREN'
RPAREN = 'RPAREN'
ID = 'ID'
ASSIGN = 'ASSIGN'
BEGIN = 'BEGIN'
END = 'END'
SEMI = 'SEMI'
DOT = 'DOT'
PROGRAM = 'PROGRAM'
VAR = 'VAR'
COLON = 'COLON'
COMMA = 'COMMA'
EOF = 'EOF'
class Token(object):
def __init__(self, type, value):
self.type = type
self.value = value
def __str__(self):
"""String representation of the class instance.
Examples:
Token(INTEGER, 3)
Token(PLUS, '+')
Token(MUL, '*')
"""
return 'Token({type}, {value})'.format(
type=self.type,
value=repr(self.value)
)
def __repr__(self):
return self.__str__()
RESERVED_KEYWORDS = {
'PROGRAM': Token('PROGRAM', 'PROGRAM'),
'VAR': Token('VAR', 'VAR'),
'DIV': Token('INTEGER_DIV', 'DIV'),
'INTEGER': Token('INTEGER', 'INTEGER'),
'REAL': Token('REAL', 'REAL'),
'BEGIN': Token('BEGIN', 'BEGIN'),
'END': Token('END', 'END'),
}
class Lexer(object):
def __init__(self, text):
# client string input, e.g. "4 + 2 * 3 - 6 / 2"
self.text = text
# self.pos is an index into self.text
self.pos = 0
self.current_char = self.text[self.pos]
def error(self):
raise Exception('Invalid character')
def advance(self):
"""Advance the `pos` pointer and set the `current_char` variable."""
self.pos += 1
if self.pos > len(self.text) - 1:
self.current_char = None # Indicates end of input
else:
self.current_char = self.text[self.pos]
def peek(self):
peek_pos = self.pos + 1
if peek_pos > len(self.text) - 1:
return None
else:
return self.text[peek_pos]
def skip_whitespace(self):
while self.current_char is not None and self.current_char.isspace():
self.advance()
def skip_comment(self):
while self.current_char != '}':
self.advance()
self.advance() # the closing curly brace
def number(self):
"""Return a (multidigit) integer or float consumed from the input."""
result = ''
while self.current_char is not None and self.current_char.isdigit():
result += self.current_char
self.advance()
if self.current_char == '.':
result += self.current_char
self.advance()
while (
self.current_char is not None and
self.current_char.isdigit()
):
result += self.current_char
self.advance()
token = Token('REAL_CONST', float(result))
else:
token = Token('INTEGER_CONST', int(result))
return token
def _id(self):
"""Handle identifiers and reserved keywords"""
result = ''
while self.current_char is not None and self.current_char.isalnum():
result += self.current_char
self.advance()
token = RESERVED_KEYWORDS.get(result, Token(ID, result))
return token
def get_next_token(self):
"""Lexical analyzer (also known as scanner or tokenizer)
This method is responsible for breaking a sentence
apart into tokens. One token at a time.
"""
while self.current_char is not None:
if self.current_char.isspace():
self.skip_whitespace()
continue
if self.current_char == '{':
self.advance()
self.skip_comment()
continue
if self.current_char.isalpha():
return self._id()
if self.current_char.isdigit():
return self.number()
if self.current_char == ':' and self.peek() == '=':
self.advance()
self.advance()
return Token(ASSIGN, ':=')
if self.current_char == ';':
self.advance()
return Token(SEMI, ';')
if self.current_char == ':':
self.advance()
return Token(COLON, ':')
if self.current_char == ',':
self.advance()
return Token(COMMA, ',')
if self.current_char == '+':
self.advance()
return Token(PLUS, '+')
if self.current_char == '-':
self.advance()
return Token(MINUS, '-')
if self.current_char == '*':
self.advance()
return Token(MUL, '*')
if self.current_char == '/':
self.advance()
return Token(FLOAT_DIV, '/')
if self.current_char == '(':
self.advance()
return Token(LPAREN, '(')
if self.current_char == ')':
self.advance()
return Token(RPAREN, ')')
if self.current_char == '.':
self.advance()
return Token(DOT, '.')
self.error()
return Token(EOF, None)
###############################################################################
# #
# PARSER #
# #
###############################################################################
class AST(object):
pass
class BinOp(AST):
def __init__(self, left, op, right):
self.left = left
self.token = self.op = op
self.right = right
class Num(AST):
def __init__(self, token):
self.token = token
self.value = token.value
class UnaryOp(AST):
def __init__(self, op, expr):
self.token = self.op = op
self.expr = expr
class Compound(AST):
"""Represents a 'BEGIN ... END' block"""
def __init__(self):
self.children = []
class Assign(AST):
def __init__(self, left, op, right):
self.left = left
self.token = self.op = op
self.right = right
class Var(AST):
"""The Var node is constructed out of ID token."""
def __init__(self, token):
self.token = token
self.value = token.value
class NoOp(AST):
pass
class Program(AST):
def __init__(self, name, block):
self.name = name
self.block = block
class Block(AST):
def __init__(self, declarations, compound_statement):
self.declarations = declarations
self.compound_statement = compound_statement
class VarDecl(AST):
def __init__(self, var_node, type_node):
self.var_node = var_node
self.type_node = type_node
class Type(AST):
def __init__(self, token):
self.token = token
self.value = token.value
class Parser(object):
def __init__(self, lexer):
self.lexer = lexer
# set current token to the first token taken from the input
self.current_token = self.lexer.get_next_token()
def error(self):
raise Exception('Invalid syntax')
def eat(self, token_type):
# compare the current token type with the passed token
# type and if they match then "eat" the current token
# and assign the next token to the self.current_token,
# otherwise raise an exception.
if self.current_token.type == token_type:
self.current_token = self.lexer.get_next_token()
else:
self.error()
def program(self):
"""program : PROGRAM variable SEMI block DOT"""
self.eat(PROGRAM)
var_node = self.variable()
prog_name = var_node.value
self.eat(SEMI)
block_node = self.block()
program_node = Program(prog_name, block_node)
self.eat(DOT)
return program_node
def block(self):
"""block : declarations compound_statement"""
declaration_nodes = self.declarations()
compound_statement_node = self.compound_statement()
node = Block(declaration_nodes, compound_statement_node)
return node
def declarations(self):
"""declarations : VAR (variable_declaration SEMI)+
| empty
"""
declarations = []
if self.current_token.type == VAR:
self.eat(VAR)
while self.current_token.type == ID:
var_decl = self.variable_declaration()
declarations.extend(var_decl)
self.eat(SEMI)
return declarations
def variable_declaration(self):
"""variable_declaration : ID (COMMA ID)* COLON type_spec"""
var_nodes = [Var(self.current_token)] # first ID
self.eat(ID)
while self.current_token.type == COMMA:
self.eat(COMMA)
var_nodes.append(Var(self.current_token))
self.eat(ID)
self.eat(COLON)
type_node = self.type_spec()
var_declarations = [
VarDecl(var_node, type_node)
for var_node in var_nodes
]
return var_declarations
def type_spec(self):
"""type_spec : INTEGER
| REAL
"""
token = self.current_token
if self.current_token.type == INTEGER:
self.eat(INTEGER)
else:
self.eat(REAL)
node = Type(token)
return node
def compound_statement(self):
"""
compound_statement: BEGIN statement_list END
"""
self.eat(BEGIN)
nodes = self.statement_list()
self.eat(END)
root = Compound()
for node in nodes:
root.children.append(node)
return root
def statement_list(self):
"""
statement_list : statement
| statement SEMI statement_list
"""
node = self.statement()
results = [node]
while self.current_token.type == SEMI:
self.eat(SEMI)
results.append(self.statement())
return results
def statement(self):
"""
statement : compound_statement
| assignment_statement
| empty
"""
if self.current_token.type == BEGIN:
node = self.compound_statement()
elif self.current_token.type == ID:
node = self.assignment_statement()
else:
node = self.empty()
return node
def assignment_statement(self):
"""
assignment_statement : variable ASSIGN expr
"""
left = self.variable()
token = self.current_token
self.eat(ASSIGN)
right = self.expr()
node = Assign(left, token, right)
return node
def variable(self):
"""
variable : ID
"""
node = Var(self.current_token)
self.eat(ID)
return node
def empty(self):
"""An empty production"""
return NoOp()
def expr(self):
"""
expr : term ((PLUS | MINUS) term)*
"""
node = self.term()
while self.current_token.type in (PLUS, MINUS):
token = self.current_token
if token.type == PLUS:
self.eat(PLUS)
elif token.type == MINUS:
self.eat(MINUS)
node = BinOp(left=node, op=token, right=self.term())
return node
def term(self):
"""term : factor ((MUL | INTEGER_DIV | FLOAT_DIV) factor)*"""
node = self.factor()
while self.current_token.type in (MUL, INTEGER_DIV, FLOAT_DIV):
token = self.current_token
if token.type == MUL:
self.eat(MUL)
elif token.type == INTEGER_DIV:
self.eat(INTEGER_DIV)
elif token.type == FLOAT_DIV:
self.eat(FLOAT_DIV)
node = BinOp(left=node, op=token, right=self.factor())
return node
def factor(self):
"""factor : PLUS factor
| MINUS factor
| INTEGER_CONST
| REAL_CONST
| LPAREN expr RPAREN
| variable
"""
token = self.current_token
if token.type == PLUS:
self.eat(PLUS)
node = UnaryOp(token, self.factor())
return node
elif token.type == MINUS:
self.eat(MINUS)
node = UnaryOp(token, self.factor())
return node
elif token.type == INTEGER_CONST:
self.eat(INTEGER_CONST)
return Num(token)
elif token.type == REAL_CONST:
self.eat(REAL_CONST)
return Num(token)
elif token.type == LPAREN:
self.eat(LPAREN)
node = self.expr()
self.eat(RPAREN)
return node
else:
node = self.variable()
return node
def parse(self):
"""
program : PROGRAM variable SEMI block DOT
block : declarations compound_statement
declarations : VAR (variable_declaration SEMI)+
| empty
variable_declaration : ID (COMMA ID)* COLON type_spec
type_spec : INTEGER
compound_statement : BEGIN statement_list END
statement_list : statement
| statement SEMI statement_list
statement : compound_statement
| assignment_statement
| empty
assignment_statement : variable ASSIGN expr
empty :
expr : term ((PLUS | MINUS) term)*
term : factor ((MUL | INTEGER_DIV | FLOAT_DIV) factor)*
factor : PLUS factor
| MINUS factor
| INTEGER_CONST
| REAL_CONST
| LPAREN expr RPAREN
| variable
variable: ID
"""
node = self.program()
if self.current_token.type != EOF:
self.error()
return node
###############################################################################
# #
# AST visitors (walkers) #
# #
###############################################################################
class NodeVisitor(object):
def visit(self, node):
method_name = 'visit_' + type(node).__name__
visitor = getattr(self, method_name, self.generic_visit)
return visitor(node)
def generic_visit(self, node):
raise Exception('No visit_{} method'.format(type(node).__name__))
###############################################################################
# #
# SYMBOLS and SYMBOL TABLE #
# #
###############################################################################
class Symbol(object):
def __init__(self, name, type=None):
self.name = name
self.type = type
class VarSymbol(Symbol):
def __init__(self, name, type):
super(VarSymbol, self).__init__(name, type)
def __str__(self):
return '<{name}:{type}>'.format(name=self.name, type=self.type)
__repr__ = __str__
class BuiltinTypeSymbol(Symbol):
def __init__(self, name):
super(BuiltinTypeSymbol, self).__init__(name)
def __str__(self):
return self.name
__repr__ = __str__
class SymbolTable(object):
def __init__(self):
self._symbols = OrderedDict()
self._init_builtins()
def _init_builtins(self):
self.define(BuiltinTypeSymbol('INTEGER'))
self.define(BuiltinTypeSymbol('REAL'))
def __str__(self):
s = 'Symbols: {symbols}'.format(
symbols=[value for value in self._symbols.values()]
)
return s
__repr__ = __str__
def define(self, symbol):
print('Define: %s' % symbol)
self._symbols[symbol.name] = symbol
def lookup(self, name):
print('Lookup: %s' % name)
symbol = self._symbols.get(name)
# 'symbol' is either an instance of the Symbol class or 'None'
return symbol
class SymbolTableBuilder(NodeVisitor):
def __init__(self):
self.symtab = SymbolTable()
def visit_Block(self, node):
for declaration in node.declarations:
self.visit(declaration)
self.visit(node.compound_statement)
def visit_Program(self, node):
self.visit(node.block)
def visit_BinOp(self, node):
self.visit(node.left)
self.visit(node.right)
def visit_Num(self, node):
pass
def visit_UnaryOp(self, node):
self.visit(node.expr)
def visit_Compound(self, node):
for child in node.children:
self.visit(child)
def visit_NoOp(self, node):
pass
def visit_VarDecl(self, node):
type_name = node.type_node.value
type_symbol = self.symtab.lookup(type_name)
var_name = node.var_node.value
var_symbol = VarSymbol(var_name, type_symbol)
self.symtab.define(var_symbol)
def visit_Assign(self, node):
var_name = node.left.value
var_symbol = self.symtab.lookup(var_name)
if var_symbol is None:
raise NameError(repr(var_name))
self.visit(node.right)
def visit_Var(self, node):
var_name = node.value
var_symbol = self.symtab.lookup(var_name)
if var_symbol is None:
raise NameError(repr(var_name))
###############################################################################
# #
# INTERPRETER #
# #
###############################################################################
class Interpreter(NodeVisitor):
def __init__(self, tree):
self.tree = tree
self.GLOBAL_MEMORY = OrderedDict()
def visit_Program(self, node):
self.visit(node.block)
def visit_Block(self, node):
for declaration in node.declarations:
self.visit(declaration)
self.visit(node.compound_statement)
def visit_VarDecl(self, node):
# Do nothing
pass
def visit_Type(self, node):
# Do nothing
pass
def visit_BinOp(self, node):
if node.op.type == PLUS:
return self.visit(node.left) + self.visit(node.right)
elif node.op.type == MINUS:
return self.visit(node.left) - self.visit(node.right)
elif node.op.type == MUL:
return self.visit(node.left) * self.visit(node.right)
elif node.op.type == INTEGER_DIV:
return self.visit(node.left) // self.visit(node.right)
elif node.op.type == FLOAT_DIV:
return float(self.visit(node.left)) / float(self.visit(node.right))
def visit_Num(self, node):
return node.value
def visit_UnaryOp(self, node):
op = node.op.type
if op == PLUS:
return +self.visit(node.expr)
elif op == MINUS:
return -self.visit(node.expr)
def visit_Compound(self, node):
for child in node.children:
self.visit(child)
def visit_Assign(self, node):
var_name = node.left.value
var_value = self.visit(node.right)
self.GLOBAL_MEMORY[var_name] = var_value
def visit_Var(self, node):
var_name = node.value
var_value = self.GLOBAL_MEMORY.get(var_name)
return var_value
def visit_NoOp(self, node):
pass
def interpret(self):
tree = self.tree
if tree is None:
return ''
return self.visit(tree)
def main():
import sys
text = open(sys.argv[1], 'r').read()
lexer = Lexer(text)
parser = Parser(lexer)
tree = parser.parse()
symtab_builder = SymbolTableBuilder()
symtab_builder.visit(tree)
print('')
print('Symbol Table contents:')
print(symtab_builder.symtab)
interpreter = Interpreter(tree)
result = interpreter.interpret()
print('')
print('Run-time GLOBAL_MEMORY contents:')
for k, v in sorted(interpreter.GLOBAL_MEMORY.items()):
print('%s = %s' % (k, v))
if __name__ == '__main__':
main()