fix two bugs

onesuper · Apr 3, 2015 · cb3d144 · cb3d144
1 parent 20d0fb7
commit cb3d144
Show file tree

Hide file tree

Showing 4 changed files with 99 additions and 60 deletions.
diff --git a/scheme/lexer.py b/scheme/lexer.py
@@ -4,9 +4,9 @@
 
 class Token:
     def __init__(self, type):
-        self.type = type  # supposed to be a string
-        self.raw = None
-        self.value = None
+        self.type = type    # string e.g. 'INT'
+        self.raw = None     # string e.g. 'name123'
+        self.value = None   # python type, e.g. True
         self.lineno = 0
         self.colno = 0
 
@@ -15,38 +15,38 @@ def __str__(self):
                 % (self.type, repr(self.value), self.lineno, self.colno)
 
 ##
-# @brief Handlers to get the value of a type 
-def t_INTEGER(t):
+# @brief Handlers to set the value of a type from its raw data
+def t_INT(t):
     t.value = int(t.raw)
     return t
 
 def t_ID(t):
+    t.value = t.raw
     return t
 
-def t_BOOLEAN(t):
+def t_BOOL(t):
     if t.raw == '#t':
         t.value = True
     else:
         t.value = False
     return t
 
 class Lexer:
-
     letter = r'([A-Za-z])'
     digit = r'([0-9])'
     initial = r'(\.|\_|\+|\-|\!|\$|\%|\&|\*|\/|:|<|=|>|\?|~|\'|' + letter + r'|' + digit + r')'
     subsequent = r'(' + initial + r'|#)'
 
     # Regexes
+    integer_rex = re.compile(r'\d+')
     ident_rex = re.compile(r'(' + initial + r'(' + subsequent + r')*)')
-    interger_rex = re.compile(r'\d+')
     boolean_rex = re.compile(r'\#t|\#f')
 
     tokens = [
         # regex,    type,   handler
-        (interger_rex,  'INT',   t_INTEGER),
-        (ident_rex,     'ID',       t_ID),
-        (boolean_rex,   'BOOL', t_BOOLEAN),
+        (integer_rex,   'INT',  t_INT),
+        (ident_rex,     'ID',   t_ID),
+        (boolean_rex,   'BOOL', t_BOOL),
     ]
 
     def __init__(self, s):

diff --git a/scheme/parser.py b/scheme/parser.py
@@ -4,86 +4,125 @@
 
 from utils import unique_id
 
-class SExpr:
-    def __init__(self, tok):
-        self.id = unique_id()()
+class SExp:
+    ident = '  '
+    newline = '\n'
+
+    def __init__(self, tok, id):
+        self.id = id
         self.lineno = tok.lineno
         self.colno = tok.colno
-        self.children = []
-        self.ident = '  '
-        self.newline = '\n'
+        self.children = None
 
-    def append(self, subs):
-        'append a sub-sexpr to myself'
+    def append(self, sexp):
+        'append a sub-sexp to me'
         if self.children is None:
             self.children = []
-        self.children.append(subs)
+        self.children.append(sexp)
+
+    def isEmptyList(self):
+        if self.children:
+            return False
+        else:
+            return True
+
+    def to_lisp_str(self):
+        'Convert the SExp to a Lisp-readable string.'
+        s = '('
+        if self.children:
+            for x in self.children:
+                s += x.to_lisp_str()
+                s += ' '
+        if s[-1] == ' ': s = s[:-1] + ')'
+        else: s += ')'
+        return s
 
     def __str__(self, level=0):
         'recursively generate a S-expression node'
         s = self.ident * level
-        s += '`-Sexp %d <line:%d, col:%d>' % (self.id, self.lineno, self.colno)
+        s += '`-SExp %d <line:%d, col:%d>' % (self.id, self.lineno, self.colno)
         s += self.newline
-        for x in self.children:
-            s += x.__str__(level + 1)
+        if self.children:
+            for x in self.children:
+                s += x.__str__(level + 1)
         return s
 
 
-# class SNil(SExpr):
-#     def __init__(self, tok):
-#         SExpr.__init__(self, tok)
-#         self.children = None
-
-#     def __str__(self, level=0):
-#         s = self.ident * level
-#         s += '`-SNil %d <line:%d, col:%d>' % (self.id, self.lineno, self.colno)
-#         s += self.newline
-#         return s
-
-
-class SAtom(SExpr):
-    def __init__(self, tok):
-        SExpr.__init__(self, tok)
+class SAtom(SExp):
+    def __init__(self, tok, id):
+        SExp.__init__(self, tok, id)
         self.type = tok.type
         self.value = tok.value
         self.children = None
 
     def __str__(self, level=0):
         s = self.ident * level
-        s += '`-SAtom %d <line:%d, col:%d> %s %s' % (self.id, self.lineno,
+        s += '`-%s %d <line:%d, col:%d> %s %s' % (
+            self.__class__.__name__, self.id, self.lineno, 
             self.colno, repr(self.value), self.type)
         s += self.newline
         return s
 
 
+class SSymbol(SAtom):
+    def __init__(self, tok, id):
+        SAtom.__init__(self, tok, id)
+
+    def to_lisp_str(self):
+        'Convert the SInt to a Lisp-readable string.'
+        return self.value
+
+
+class SInt(SAtom):
+    def __init__(self, tok, id):
+        SAtom.__init__(self, tok, id)
+
+    def to_lisp_str(self):
+        'Convert the SInt to a Lisp-readable string.'
+        return str(self.value)
+
+
+class SBool(SAtom):
+    def __init__(self, tok, id):
+        SAtom.__init__(self, tok, id)
+
+    def to_lisp_str(self):
+        'Convert the SBool to a Lisp-readable string.'
+        if self.value: return '#t'
+        else: return '#f'
+
+
+
 class Parser:
     def __init__(self, lexer):
         '''init a token list from a lexer'''
-        self.tokens = []
+        self._tokens = []
         while True:
             t = lexer.next_token()
             if t is None:
                 break
-            self.tokens.append(t)
+            self._tokens.append(t)
+        # increasing unique id for each S-expression.
+        self.new_id = unique_id()
 
     # Form an S-expression from lexical tokens
-    def form_sexpr(self):
-
-        if len(self.tokens) == 0:
+    def form_sexp(self):
+        if len(self._tokens) == 0:
             raise ParserError("expected an (' but end of string")
 
-        token = self.tokens.pop(0)
-
-        if token.type == 'LPAR':  # S-expression
-            L = SExpr(token)
-            while self.tokens[0].type != 'RPAR':
-                L.append(self.form_sexpr())
-            self.tokens.pop(0) # pop off ')'
+        tok = self._tokens.pop(0)
+        if tok.type == 'LPAR':  # S-expression
+            L = SExp(tok, self.new_id())
+            while self._tokens[0].type != 'RPAR':
+                L.append(self.form_sexp())
+            self._tokens.pop(0) # pop off ')'
             return L
-        elif token.type in ['ID', 'INT', 'BOOL']:  # Atom
-            return SAtom(token)
+        elif tok.type == 'ID':
+            return SSymbol(tok, self.new_id())
+        elif tok.type == 'INT':
+            return SInt(tok, self.new_id())
+        elif tok.type == 'BOOL':
+            return SBool(tok, self.new_id())
         else:
-            raise ParserError("Unrecognized token '%s' at line %d, col %d" % (token.raw, token.lineno, token.colno))
+            raise ParserError("Unrecognized token '%s' at line %d, col %d" % (tok.raw, tok.lineno, tok.colno))
 
-
-
diff --git a/scheme/utils.py b/scheme/utils.py
@@ -30,9 +30,8 @@ def to_string(exp):
     else:                           # number
         return str(exp)
 
-
 def ordinal(n):
-    # ordinal number decoration
+    # returns an ordinal number decorated string
     if 4 <= n <= 20 or 24 <= n <= 30:
         suffix = "th"
     else:

diff --git a/test/test_parser.py b/test/test_parser.py
@@ -30,8 +30,9 @@
 def parse_it(s):
     lexer = Lexer(s)
     parser = Parser(lexer)
-    print parser.form_sexpr()
-
+    sexp = parser.form_sexp()
+    print sexp
+    print sexp.to_lisp_str()
 
 for code in (c1, c2, c3):
     try: