In [10]:
pip install sly

Note: you may need to restart the kernel to use updated packages.


In [24]:
#Use sly library for lexer and parser
from sly import Lexer

In [25]:
#Building a Lexer
class myLexer(Lexer):
    tokens = { NAME, NUMBER, STRING } #set of token names
    ignore = '\t ' #ignore space between tokens
    literals = {'=','+','-','/','*','(',')',',',';'}
    
    NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
    STRING = r'\".*?"'
    
    #Number Token
    @_(r'\d+')
    def NUMBER(self, t):
       t.value = int(t.value) #convert to numeric value
       return t
       
    #Comment Token
    @_(r'\#.*')
    def COMMENT(self, t):
       pass
       
    #New Line Token
    @_(r'\n+')
    def newline(self, t):
       self.lineno = t.value.count('\n')
       
       
if __name__ == '__main__':
    data = """
    #This should be ignore
    x = 10
    y = 15
    z = x + y
    """
    
    lexer = myLexer()
    for tok in lexer.tokenize(data):
        print(tok)
       

Token(type='NAME', value='x', lineno=1, index=32)
Token(type='=', value='=', lineno=1, index=34)
Token(type='NUMBER', value=10, lineno=1, index=36)
Token(type='NAME', value='y', lineno=1, index=43)
Token(type='=', value='=', lineno=1, index=45)
Token(type='NUMBER', value=15, lineno=1, index=47)
Token(type='NAME', value='z', lineno=1, index=54)
Token(type='=', value='=', lineno=1, index=56)
Token(type='NAME', value='x', lineno=1, index=58)
Token(type='+', value='+', lineno=1, index=60)
Token(type='NAME', value='y', lineno=1, index=62)


In [26]:
from sly import Parser

In [27]:
#Build a Parser
class myParser(Parser):
    tokens = myLexer.tokens
    
    #Precedence rule
    precedence = (
        ('left','+','-'),
        ('left','*','/'),
        ('right','UMINUS'), #Unary minus operator
    )
    
    def __init__(self):
        self.env = { }
        
    #grammar rules and actions
    @_('')
    def statement(self,p):
        pass
    
    @_('value')
    def statement(self,p):
        return p.value
    
    @_('NAME "=" expr')
    def value(self,p):
        return('value',p.NAME, p.expr)
    
    @_('NAME "=" STRING')
    def value(self,p):
        return('value',p.NAME, p.STRING)
    
    @_('expr')
    def statement(self,p):
        return(p.expr)
    
    @_('expr "+" expr')
    def expr(self,p):
        return('add', p.expr0, p.expr1)
    
    @_('expr "-" expr')
    def expr(self,p):
        return('sub', p.expr0, p.expr1)
    
    @_('expr "*" expr')
    def expr(self,p):
        return('mul',p.expr0, p.expr1)
    
    @_('expr "/" expr')
    def expr(self,p):
        return('div',p.expr0, p.expr1)
    
    @_('"-" expr %prec UMINUS')
    def expr(self,p):
        return p.expr
    
    @_('NAME')
    def expr(self,p):
        return('var',p.NAME)
    
    @_('NUMBER')
    def expr(self,p):
        return('num',p.NUMBER)
        
if __name__ == '__main__':
    lexer = myLexer()
    parser = myParser()

    while True:
        try:
            text = input('test string > ')
            result = parser.parse(lexer.tokenize(text))
            print(result)
        except EOFError:
            break
 

test string > a = 10
('value', 'a', ('num', 10))
test string > b = 5
('value', 'b', ('num', 5))


KeyboardInterrupt: Interrupted by user

In [28]:
#Implementation
#Build an Abstract Syntax Tree
class myTest:
    def __init__(self, tree, env):
        self.env = env
        result = self.myTree(tree)
        if result is not None and isinstance(result, int):
            print(result)
        if isinstance(result, str) and result[0] == '"':
            print(result)
            
    def myTree(self, node):
        if isinstance(node, int):
            return node
        if isinstance(node, str):
            return node
        
        if node is None:
            return None
    
        if node[0] == 'program':
            if node[1] == None:
                self.myTree(node[2])
            else:
                self.myTree(node[1])
                self.myTree(node[2])
            
        if node[0] == 'num':
            return node[1]
    
        if node[0] == 'str':
            return node[1]
    
        if node[0] == 'add':
            return self.myTree(node[1]) + self.myTree(node[2])
        elif node[0] == 'sub':
            return self.myTree(node[1]) - self.myTree(node[2])
        elif node[0] == 'mul':
            return self.myTree(node[1]) * self.myTree(node[2])
        elif node[0] == 'div':
            return self.myTree(node[1]) / self.myTree(node[2])
    
        if node[0] == 'value':
            self.env[node[1]] = self.myTree(node[2])
            return node[1]
    
        if node[0] == 'var':
            try:
                return self.env[node[1]]
            except LookupError:
                print("Undefined variable '"+node[1]+"' found!")
                return 0
    
    
    
    

In [None]:
if __name__ == '__main__':
    lexer = myLexer()
    parser = myParser()
    env = {}
    
    while True:
        
        try:
            text = input('my Language > ')
            
        except EOFError:
            break
            
        if text:
            tree = parser.parse(lexer.tokenize(text))
            myTest(tree, env)

my Language > a = 10
my Language > b = 5
my Language > c = a + b
my Language > c
15
my Language > a = 10 
my Language > b = 5
my Language > c = a * b
my Language > c
50
