Skip to content

Commit

Permalink
fix two bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
onesuper committed Apr 3, 2015
1 parent 20d0fb7 commit cb3d144
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 60 deletions.
22 changes: 11 additions & 11 deletions scheme/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

class Token:
def __init__(self, type):
self.type = type # supposed to be a string
self.raw = None
self.value = None
self.type = type # string e.g. 'INT'
self.raw = None # string e.g. 'name123'
self.value = None # python type, e.g. True
self.lineno = 0
self.colno = 0

Expand All @@ -15,38 +15,38 @@ def __str__(self):
% (self.type, repr(self.value), self.lineno, self.colno)

##
# @brief Handlers to get the value of a type
def t_INTEGER(t):
# @brief Handlers to set the value of a type from its raw data
def t_INT(t):
t.value = int(t.raw)
return t

def t_ID(t):
t.value = t.raw
return t

def t_BOOLEAN(t):
def t_BOOL(t):
if t.raw == '#t':
t.value = True
else:
t.value = False
return t

class Lexer:

letter = r'([A-Za-z])'
digit = r'([0-9])'
initial = r'(\.|\_|\+|\-|\!|\$|\%|\&|\*|\/|:|<|=|>|\?|~|\'|' + letter + r'|' + digit + r')'
subsequent = r'(' + initial + r'|#)'

# Regexes
integer_rex = re.compile(r'\d+')
ident_rex = re.compile(r'(' + initial + r'(' + subsequent + r')*)')
interger_rex = re.compile(r'\d+')
boolean_rex = re.compile(r'\#t|\#f')

tokens = [
# regex, type, handler
(interger_rex, 'INT', t_INTEGER),
(ident_rex, 'ID', t_ID),
(boolean_rex, 'BOOL', t_BOOLEAN),
(integer_rex, 'INT', t_INT),
(ident_rex, 'ID', t_ID),
(boolean_rex, 'BOOL', t_BOOL),
]

def __init__(self, s):
Expand Down
129 changes: 84 additions & 45 deletions scheme/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,86 +4,125 @@

from utils import unique_id

class SExpr:
def __init__(self, tok):
self.id = unique_id()()
class SExp:
ident = ' '
newline = '\n'

def __init__(self, tok, id):
self.id = id
self.lineno = tok.lineno
self.colno = tok.colno
self.children = []
self.ident = ' '
self.newline = '\n'
self.children = None

def append(self, subs):
'append a sub-sexpr to myself'
def append(self, sexp):
'append a sub-sexp to me'
if self.children is None:
self.children = []
self.children.append(subs)
self.children.append(sexp)

def isEmptyList(self):
if self.children:
return False
else:
return True

def to_lisp_str(self):
'Convert the SExp to a Lisp-readable string.'
s = '('
if self.children:
for x in self.children:
s += x.to_lisp_str()
s += ' '
if s[-1] == ' ': s = s[:-1] + ')'
else: s += ')'
return s

def __str__(self, level=0):
'recursively generate a S-expression node'
s = self.ident * level
s += '`-Sexp %d <line:%d, col:%d>' % (self.id, self.lineno, self.colno)
s += '`-SExp %d <line:%d, col:%d>' % (self.id, self.lineno, self.colno)
s += self.newline
for x in self.children:
s += x.__str__(level + 1)
if self.children:
for x in self.children:
s += x.__str__(level + 1)
return s


# class SNil(SExpr):
# def __init__(self, tok):
# SExpr.__init__(self, tok)
# self.children = None

# def __str__(self, level=0):
# s = self.ident * level
# s += '`-SNil %d <line:%d, col:%d>' % (self.id, self.lineno, self.colno)
# s += self.newline
# return s


class SAtom(SExpr):
def __init__(self, tok):
SExpr.__init__(self, tok)
class SAtom(SExp):
def __init__(self, tok, id):
SExp.__init__(self, tok, id)
self.type = tok.type
self.value = tok.value
self.children = None

def __str__(self, level=0):
s = self.ident * level
s += '`-SAtom %d <line:%d, col:%d> %s %s' % (self.id, self.lineno,
s += '`-%s %d <line:%d, col:%d> %s %s' % (
self.__class__.__name__, self.id, self.lineno,
self.colno, repr(self.value), self.type)
s += self.newline
return s


class SSymbol(SAtom):
def __init__(self, tok, id):
SAtom.__init__(self, tok, id)

def to_lisp_str(self):
'Convert the SInt to a Lisp-readable string.'
return self.value


class SInt(SAtom):
def __init__(self, tok, id):
SAtom.__init__(self, tok, id)

def to_lisp_str(self):
'Convert the SInt to a Lisp-readable string.'
return str(self.value)


class SBool(SAtom):
def __init__(self, tok, id):
SAtom.__init__(self, tok, id)

def to_lisp_str(self):
'Convert the SBool to a Lisp-readable string.'
if self.value: return '#t'
else: return '#f'



class Parser:
def __init__(self, lexer):
'''init a token list from a lexer'''
self.tokens = []
self._tokens = []
while True:
t = lexer.next_token()
if t is None:
break
self.tokens.append(t)
self._tokens.append(t)
# increasing unique id for each S-expression.
self.new_id = unique_id()

# Form an S-expression from lexical tokens
def form_sexpr(self):

if len(self.tokens) == 0:
def form_sexp(self):
if len(self._tokens) == 0:
raise ParserError("expected an (' but end of string")

token = self.tokens.pop(0)

if token.type == 'LPAR': # S-expression
L = SExpr(token)
while self.tokens[0].type != 'RPAR':
L.append(self.form_sexpr())
self.tokens.pop(0) # pop off ')'
tok = self._tokens.pop(0)
if tok.type == 'LPAR': # S-expression
L = SExp(tok, self.new_id())
while self._tokens[0].type != 'RPAR':
L.append(self.form_sexp())
self._tokens.pop(0) # pop off ')'
return L
elif token.type in ['ID', 'INT', 'BOOL']: # Atom
return SAtom(token)
elif tok.type == 'ID':
return SSymbol(tok, self.new_id())
elif tok.type == 'INT':
return SInt(tok, self.new_id())
elif tok.type == 'BOOL':
return SBool(tok, self.new_id())
else:
raise ParserError("Unrecognized token '%s' at line %d, col %d" % (token.raw, token.lineno, token.colno))
raise ParserError("Unrecognized token '%s' at line %d, col %d" % (tok.raw, tok.lineno, tok.colno))



3 changes: 1 addition & 2 deletions scheme/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,8 @@ def to_string(exp):
else: # number
return str(exp)


def ordinal(n):
# ordinal number decoration
# returns an ordinal number decorated string
if 4 <= n <= 20 or 24 <= n <= 30:
suffix = "th"
else:
Expand Down
5 changes: 3 additions & 2 deletions test/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@
def parse_it(s):
lexer = Lexer(s)
parser = Parser(lexer)
print parser.form_sexpr()

sexp = parser.form_sexp()
print sexp
print sexp.to_lisp_str()

for code in (c1, c2, c3):
try:
Expand Down

0 comments on commit cb3d144

Please sign in to comment.