In [1]:
#
### CONSTANTS
# 

import string

LETTERS = string.ascii_letters
DIGITS = '0123456789' 
LETTERS_DIGITS = LETTERS + DIGITS





#
### ERRORS
#

class Error:
    def __init__(self,error_name,details):
        self.error_name = error_name
        self.details = details
        
    def as_string(self):
        result = f'{self.error_name} : {self.details}'
        return result

class IllegalCharError(Error):
    def __init__(self, details):
        super().__init__('Illegal Character',details) 
        

class IllegalNumberError(Error):
    def __init__(self, details):
        super().__init__('Illegal Number: ',details)  
        

class IllegalNumberOrIdentError(Error):
    def __init__(self, details):
        super().__init__('Illegal Number or Identifier ',details) 
        
#        
### Parser
#

class ExpectedIdentError(Error):
    def __init__(self, details):
        super().__init__('Expected Identifier :', details )  
        
        
class ExpectedSymbolError(Error):
    def __init__(self, details):
        super().__init__('Expected Symbol :', details )  
        
        
class IllegalMoveError(Error):
    def __init__(self, details):
        super().__init__('You can not  ',details)  
        
        
class UnknownError(Error):
    def __init__(self, details):
        super().__init__('Error on',details) 
        
        
        



#
### TOKENS
#

# data type
TT_INT = 'TT_INT'
TT_FLOAT = 'FLOAT'

# Operations
TT_PLUS = 'PLUS'
TT_MINUS = 'MINUS'
TT_MUL = 'MUL'
TT_DIV = 'DIV'  

# symbols
TT_LPAREN = 'LPAREN' 
TT_RPAREN = 'RPAREN'  
TT_LBRACE = "LEFT_BRACE"
TT_RBRACE = "RIGHT_BRACE"
TT_EOF = 'EOF'  
TT_ASSIGN = '=' 
TT_LT = "<"
TT_GT = ">"
TT_EQ     = "=="
TT_NOT_EQ = "!="

TT_COMMA = ","


# 
TT_INDENTIFIER = 'INDENTIFIER'
TT_KEYWORD = 'KEYWORD' 
  

TT_NEWLINE = 'NEWLINE'
TT_EOF = 'EOF' 





KEYWORDS = [ 
    'var', 
    'END',
"def",
"var", 
"True",
"False",
"if", 
"else",
"return",
    "bool",
"int", 
    "return" 
]

class Token:
    def __init__(self,type_, value = None):
        self.type = type_
        self.value = value
    
    def __repr__(self):
        if self.value: return f'{self.type} : {self.value}'
        return f'{self.type}' 
    
    def matches(self, type_, value):
        return self.type == type_ and self.value == value
    
#
### LEXER
# 

class Lexer:
    def __init__(self,text):
        self.text = text
        self.pos = -1
        self.current_char = None
        self.advance()
        
    def advance(self):
        self.pos += 1
        self.current_char = self.text[self.pos] if self.pos < len(self.text) else None
    
    def make_tokens(self):
        tokens = list()
        
        while self.current_char != None:
            if self.current_char in '  \t':
                self.advance() 
                
            elif self.current_char in ';\n':
                tokens.append(Token(TT_NEWLINE))
                self.advance() 
                
                  
            elif self.current_char in LETTERS:
                tokens.append(self.make_identifier())
                
            elif self.current_char in DIGITS:
                number, err = self.make_number()
                if err:
                    return number, err
                    
                tokens.append(number)  
                
            elif self.current_char == '+':
                tokens.append(Token(TT_PLUS))
                self.advance() 
            elif self.current_char == '-':
                tokens.append(Token(TT_MINUS))
                self.advance() 
            elif self.current_char == '*':
                tokens.append(Token(TT_MUL))
                self.advance()  
            elif self.current_char == '/':
                tokens.append(Token(TT_DIV))
                self.advance()  
            elif self.current_char == ',':
                tokens.append(Token(TT_COMMA))
                self.advance() 
                
            elif self.current_char == '!' and self.peek_char('='):
                self.advance()
                tokens.append(Token(TT_NOT_EQ))
                self.advance() 
                
            elif self.current_char == '(':
                tokens.append(Token(TT_LPAREN))
                self.advance() 
            elif self.current_char == ')':
                tokens.append(Token(TT_RPAREN))
                self.advance()    
                
            elif self.current_char == '{':
                tokens.append(Token(TT_LBRACE))
                self.advance() 
            elif self.current_char == '}':
                tokens.append(Token(TT_RBRACE))
                self.advance()  
                
            elif self.current_char == '>':
                tokens.append(Token(TT_GT))
                self.advance() 
            elif self.current_char == '<':
                tokens.append(Token(TT_LT))
                self.advance() 
                
            elif self.current_char == '=':
                if self.peek_char('='):
                    tokens.append(Token(TT_EQ))
                    self.advance()
                else:
                    tokens.append(Token(TT_ASSIGN))
                    
                self.advance() 
            else:
                char = self.current_char
                self.advance()
                return [], IllegalCharError("'" + char + "'")
            
            
        tokens.append(Token(TT_EOF))
        return tokens, None
    
    def peek_char(self, char = None, array = None):
        if char:
            if self.pos + 1 < len(self.text) and self.text[self.pos + 1] == char:
                return True
        if array:
             if self.pos + 1 < len(self.text) and self.text[self.pos + 1] in array:
                return True
            
        return False
    
    def make_number(self):
        num_str =''
        dot_count = 0
        
        while self.current_char != None and self.current_char in DIGITS + '.':
            if self.peek_char(None, LETTERS):
                num_str += self.current_char
                self.advance()
                num_str += self.current_char
                return [], IllegalNumberOrIdentError("'" + num_str + "'")
                
            
            
            
            if self.current_char == '.':
                if dot_count == 1:
                    num_str += '.'
                    return [], IllegalNumberError("'" + num_str + "'")
                dot_count +=1 
                num_str += '.'
            
            else:
                num_str += self.current_char 
            
            self.advance()
        
        if dot_count == 0:
            return Token(TT_INT, int(num_str)), None
        else:
            return Token(TT_FLOAT, float(num_str)), None   
        
    
    def make_identifier(self):
        id_str = ''
        
        while self.current_char != None and self.current_char in LETTERS_DIGITS + '_':
            id_str += self.current_char
            self.advance()
        
        tok_type = TT_KEYWORD if id_str in KEYWORDS else TT_INDENTIFIER
        return Token(tok_type, id_str)
            
        

def run(text):
    lexer = Lexer(text)
    tokens, error = lexer.make_tokens() 
    if error:
        return None, error
    return tokens, None
    

In [2]:
text = open("lab.txt", "r").read()

In [3]:
tokens, error = run(text)
if error:
    print(error.as_string())
    
print(tokens)


[KEYWORD : def, KEYWORD : int, INDENTIFIER : mult, LPAREN, KEYWORD : int, INDENTIFIER : x, ,, KEYWORD : int, INDENTIFIER : y, RPAREN, LEFT_BRACE, NEWLINE, KEYWORD : return, INDENTIFIER : x, MUL, INDENTIFIER : y, NEWLINE, RIGHT_BRACE, NEWLINE, NEWLINE, NEWLINE, NEWLINE, KEYWORD : var, INDENTIFIER : b, =, TT_INT : 3, MUL, TT_INT : 4, PLUS, TT_INT : 2, NEWLINE, KEYWORD : var, INDENTIFIER : a, =, KEYWORD : False, NEWLINE, KEYWORD : if, LPAREN, TT_INT : 5, >, TT_INT : 2, RPAREN, LEFT_BRACE, KEYWORD : var, INDENTIFIER : b, =, TT_INT : 3, RIGHT_BRACE, KEYWORD : else, LEFT_BRACE, KEYWORD : var, INDENTIFIER : auf, =, TT_INT : 412, RIGHT_BRACE, NEWLINE, NEWLINE, INDENTIFIER : kek_call, LPAREN, RPAREN, NEWLINE, NEWLINE, NEWLINE, KEYWORD : def, INDENTIFIER : main, LPAREN, RPAREN, LEFT_BRACE, NEWLINE, NEWLINE, KEYWORD : var, INDENTIFIER : a, =, KEYWORD : True, NEWLINE, KEYWORD : var, INDENTIFIER : b, =, FLOAT : 10.5, NEWLINE, NEWLINE, KEYWORD : var, INDENTIFIER : res, =, INDENTIFIER : mult, LPAREN,

In [4]:
# 
### NODES 
# 

class NumberNode:
    def __init__(self,tok):
        self.tok = tok
    
    def __repr__(self):
        return f'{self.tok}' 
    
class BoolNode:
    def __init__(self,tok):
        self.tok = tok.value
    
    def __repr__(self):
        return f'{self.tok}' 

class BinOpNode:
    def __init__(self, left_node, op_tok, right_node):
        self.left_node = left_node
        self.op_tok = op_tok
        self.right_node = right_node
    def __repr__(self):
        return f'( {self.left_node}, {self.op_tok},{self.right_node})' 
    
        
class VarAssignNode:
    def __init__(self, var_name_tok, op_tok, value_node):
        self.var_name_tok = var_name_tok
        self.op_tok = op_tok
        self.value_node = value_node
    
    def __repr__(self):
        return f'({self.var_name_tok}, {self.op_tok}, {self.value_node})'  
    
        
class VarAccessNode:
    def __init__(self, var_name_tok):
        self.var_name_tok = var_name_tok
        self.var_name_tok.type = 'Access Var'
    def __repr__(self):
        return f'({self.var_name_tok})'  
    
    
class IfExprNode:
    def __init__(self, cond, then, else_):
        self.cond = cond
        self.then = then
        self.else_ = else_
    def __repr__(self):
        if self.else_:
            return  f'({self.cond},IF-STATEMENT,{ ElseNode(self.then,self.else_)})'
        return f'({self.cond},IF-STATEMENT, {self.then})'
    
class ElseNode:
    def __init__(self, then1, then2):
        self.then1 = then1
        self.then2 = then2
    def __repr__(self):
        return  f'( ELSE, {self.then1},{self.then2})'
    
class ConditionNode:
    def __init__(self, cond1, op_tok = None, cond2 = None):
        self.cond1 = cond1
        self.op_tok = op_tok 
        self.cond2 = cond2
        
    def __repr__(self):
        if self.op_tok:
            return f'({self.cond1},{self.op_tok},{self.cond2})'
        else:
            return f'{self.cond1}' 
        
        
class ParameterNode:
    def __init__(self, type_, value):
        self.type_ = type_
        self.value = value 
    def __repr__(self):
        return f'(Parameter,({self.type_},{self.value}))' 
    
class NameNode:
    def __init__(self, name):
        self.name = name
    def __repr__(self):
        return f'(NAME,({self.name}))'
    
class ReturnNode:
    def __init__(self, expr):
        self.expr = expr
    def __repr__(self):
        return f'(RETURN, {self.expr})'
    

    
class FunctionNode:
    def __init__(self, name, expr = None, params = None, return_ = None):
        self.name = NameNode(name)
        self.params = params
        self.return_ = return_ 
        self.expr = expr
    def __repr__(self):
        string = f'( Function, {self.name}'
        for x in self.params:
            string += f',{x}'
        
        string += f',{self.expr}'
            
        if self.return_:
            
            string += f',{ReturnNode(self.return_)}'
        
        string += ')'
        
        return string
    
class ExprNode:
    def __init__(self, expr_list):
        self.expr_list = expr_list

    def __repr__(self):
        if bool(self.expr_list):
            string = f'( Statements'
            for x in self.expr_list:
                string += f',{x}'
            string += ')'

            return string
        return f'(No-Statements)' 
    
    

class FunctionCallNode:
    def __init__(self, name, params):
        self.name = NameNode(name)
        self.params = params

    def __repr__(self):
        string = f'( Function_Call, {self.name}'
        for x in self.params:
            string += f',{ParameterNode( x.var_name_tok.type, x.var_name_tok.value)}'        
        string += ')'
        
        return string
    
            
        
    
    
    
    

#
### PARSER
# 


class Parser:
    def __init__(self,tokens):
        self.tokens = tokens
        self.tok_idx = -1 
        self.eof_statements = False
        self.advance()
        
    def advance(self):
        self.tok_idx +=1 
        if self.tok_idx < len(self.tokens):
            self.current_tok = self.tokens[self.tok_idx]
        
        return self.current_tok  
    
    def parse(self):
        res,err = self.statements()
        return res,err
        
    def statements(self):
        statements = []
        err = None
        
        def statement_loop():
            while self.current_tok.type == TT_NEWLINE:
                self.advance()
            statement, error = self.expr() 
            if error:
                return True, error
                
    
            
            if statement == None:
                self.eof_statements = True
            
            statements.append(statement)
            return False, None
        
        
        while self.current_tok.type != TT_EOF and self.eof_statements == False:
            self.eof_statements,err = statement_loop()
                
        return statements, err
    
    
    def factor(self):
        tok = self.current_tok
        
        if tok.type in (TT_INT, TT_FLOAT, TT_INDENTIFIER):
            self.advance()
            if self.current_tok.type == TT_LPAREN:
                node, _ = self.function_call(tok)
                return node
            return NumberNode(tok)
    
    def term(self):
        return self.bin_op(self.factor, (TT_MUL, TT_DIV))
    
    def expr(self):
        
        if self.current_tok.matches(TT_KEYWORD, 'var'):
            self.advance()
            
            if self.current_tok.type != TT_INDENTIFIER:
                #print("ERROR")
                return None, ExpectedIdentError(' no var declaration')
            
            var_name = self.current_tok
            self.advance()
            
            if self.current_tok.type != TT_ASSIGN:
                #print("ERROR")
                return None, ExpectedSymbolError(TT_ASSIGN)
            op_tok = self.current_tok
            self.advance() 
            
            expr, err = self.expr()
            if err:
                return None, err
            
            return VarAssignNode(var_name,op_tok, expr), None
        
        
        
        
        if self.current_tok.matches(TT_KEYWORD, 'def'):
            self.advance()
            func, err = self.function_decl()
            return func, err
                
            
              
                
            
        if self.current_tok.matches(TT_KEYWORD, 'if'):
            return self.if_expr()      
            
        if self.current_tok.type in (TT_INT, TT_FLOAT, TT_INDENTIFIER):
            return self.bin_op(self.term, (TT_PLUS, TT_MINUS)),None 
        
        if self.current_tok.matches(TT_KEYWORD, 'True') or self.current_tok.matches(TT_KEYWORD, 'False'):

            tok = self.current_tok
            self.advance()
            return BoolNode(tok),None

        if self.current_tok.type == TT_INDENTIFIER:
            var_name = self.current_tok
            self.advance()
            if self.current_tok.type == TT_LPAREN:
                node, err = self.function_call(var_name)
            
                return node,err
            return VarAccessNode(var_name),None
        
                    
    
        if self.current_tok.type != TT_EOF:
            #print("ERROR")
            return None, UnknownError( f'{self.current_tok} , error not catched in  synthax analysis' )
        self.eof_statements = True
        
    def function_call(self, name):
        self.advance()
        param_node_list = list() 
        while self.current_tok.type != TT_RPAREN: 
            if self.current_tok.type == TT_COMMA:
                self.advance()      
            if self.current_tok.type != TT_INDENTIFIER:
                #print("ERROR")
                return None, ExpectedIdentError('specify a var in after , ')
            
            value = self.current_tok
            self.advance()
            param_node_list.append(VarAccessNode(value))
        
        if self.current_tok.type != TT_RPAREN:
            #print("ERROR")
            return None, ExpectedSymbolError(TT_RPAREN)
        
        self.advance()
        return FunctionCallNode( name, param_node_list ), None
        

        
    
    def function_decl(self):
        
        def check_data_type():
            if (self.current_tok.matches(TT_KEYWORD, 'int') or self.current_tok.matches(TT_KEYWORD, 'float')
            or  self.current_tok.matches(TT_KEYWORD, 'True') or self.current_tok.matches(TT_KEYWORD, 'False')):
                return True
            return False
            
        
        return_type = None
        if check_data_type():
            return_type = self.current_tok.type
            self.advance()
        
          
        if self.current_tok.type != TT_INDENTIFIER:
            return None, ExpectedIdentError('function name')
        
        func_name = self.current_tok
        self.advance()
        
        if self.current_tok.type != TT_LPAREN:
            #print("ERROR")
            return None, ExpectedSymbolError(TT_LPAREN)
        
        
        param_node_list = list() 
        self.advance()
        

        while self.current_tok.type != TT_RPAREN: 
            if self.current_tok.type == TT_COMMA:
                self.advance() 
            if check_data_type() == False:
                #print("ERROR")
                return None, ExpectedIdentError('insert vars or close the paranthesis')
                
            type_ = self.current_tok
            self.advance()
            
            if self.current_tok.type != TT_INDENTIFIER:
                #print("ERROR")
                return None, ExpectedIdentError('var name')
            value = self.current_tok.value
            
            self.advance()
            
            param_node_list.append(ParameterNode(type_,value)) 

        
        if self.current_tok.type != TT_RPAREN:
            #print("ERROR")
            return None, ExpectedSymbolError(TT_RPAREN)
        
        self.advance()
        
        
        expr,return_,err = self.closed_expr()
        
        if err:
            return None, err

        
        return FunctionNode( func_name, expr, param_node_list,return_ ),None
        
        
        
    # AICIII    
        
    def closed_expr(self):
         
        if self.current_tok.type != TT_LBRACE:
            print("ERROR")
            return None,None, ExpectedSymbolError(TT_LBRACE)

        self.advance()
        
        while self.current_tok.type == TT_NEWLINE:
            self.advance()
        
        expr_list = list()
        while self.current_tok.type != TT_RBRACE and self.current_tok.matches(TT_KEYWORD, 'return') == False:
            expr_returned, err = self.expr()
            if err:
                return None,None, err 
         
            expr_list.append(expr_returned)
            while self.current_tok.type == TT_NEWLINE:
                self.advance()
        
        return_ = None
        if self.current_tok.matches(TT_KEYWORD, 'return'):
            self.advance()     
            return_, err = self.expr()
            if err:
                return None,None,err
            
            while self.current_tok.type == TT_NEWLINE:
                self.advance()
            if self.current_tok.type != TT_RBRACE:
                return None, None,  IllegalMoveError('write expressions after return')
            
            
        while self.current_tok.type == TT_NEWLINE:
            self.advance()


        if self.current_tok.type != TT_RBRACE:
            #print("ERROR")
            return None,None, ExpectedSymbolError(TT_RBRACE)
            
        self.advance()      
        
        while self.current_tok.type == TT_NEWLINE:
            self.advance()
                

        
        return ExprNode(expr_list), return_, None
        
        
        

        
    
    def if_expr(self):
        self.advance()
        if self.current_tok.type != TT_LPAREN:
            return None, ExpectedSymbolError(TT_LPAREN)
        self.advance()
        condition1,err = self.expr()
        if err:
            return None,err
        op_tok = None
        condition2 = None
        
        
        if type(condition1) not in  (VarAccessNode, NumberNode, BinOpNode):
            #print("ERROR")
            return None, IllegalMoveError('Use stuff expect arith expresions, vars or  numbers on condition 1')
        
                  
        if self.current_tok.type != TT_RPAREN:
                  
            if self.current_tok.type not in (TT_LT, TT_GT, TT_EQ, TT_NOT_EQ):
                #print("ERROR")
                return None, ExpectedIdentError('Expected Operations Symbols on comparision!')
            op_tok = self.current_tok
            self.advance()
            condition2, err = self.expr() 
            if err:
                return None, err
         
            
            if type(condition2) not in  (VarAccessNode, NumberNode, BinOpNode):
                #print("ERROR")
                return None, IllegalMoveError('use stuff expect: arith expresions, vars or  numbers on condition 2')
                #print(' Use just arith expresions, vars or  numbers')

            if self.current_tok.type in (TT_LT, TT_GT, TT_EQ, TT_NOT_EQ):
                return None, IllegalMoveError('use more than 1 comparition token/operation!')
                #print("Cant use more than 1 comparition op_tok")
            
            
            if type(condition2) == BoolNode and op_tok.type not in (TT_EQ, TT_NOT_EQ):
                return None, IllegalMoveError('compare boolean!')
                #print("CANT COMPARE BOOLEAN")
            
                
        if self.current_tok.type != TT_RPAREN:
            return None, ExpectedSymbolError(TT_RPAREN)
            #print('expected )')
        self.advance()
        

        # THEN 
        
        expr,_, err = self.closed_expr()
        if err:
            return None,err
        
       
        
        else_ = None
        if self.current_tok.matches(TT_KEYWORD, 'else'):
            self.advance()
            else_,_,err = self.closed_expr()
            if err:
                return None,err
           

        return IfExprNode( ConditionNode(condition1, op_tok, condition2), expr, else_), None
         
        
    def bin_op(self, func, ops):
        left = func()

        
        while self.current_tok.type in ops:
            op_tok = self.current_tok.type
            self.advance()
            right = func() 
            left = BinOpNode(left, op_tok, right)  
        
       
        return left
        

In [5]:
parser = Parser(tokens)
res,err = parser.parse()
if err:
    print(err.as_string())
else:
    print(res)

[( Function, (NAME,(INDENTIFIER : mult)),(Parameter,(KEYWORD : int,x)),(Parameter,(KEYWORD : int,y)),(No-Statements),(RETURN, ( INDENTIFIER : x, MUL,INDENTIFIER : y))), (INDENTIFIER : b, =, ( ( TT_INT : 3, MUL,TT_INT : 4), PLUS,TT_INT : 2)), (INDENTIFIER : a, =, False), ((TT_INT : 5,>,TT_INT : 2),IF-STATEMENT,( ELSE, ( Statements,(INDENTIFIER : b, =, TT_INT : 3)),( Statements,(INDENTIFIER : auf, =, TT_INT : 412)))), ( Function_Call, (NAME,(INDENTIFIER : kek_call))), ( Function, (NAME,(INDENTIFIER : main)),( Statements,(INDENTIFIER : a, =, True),(INDENTIFIER : b, =, FLOAT : 10.5),(INDENTIFIER : res, =, ( Function_Call, (NAME,(INDENTIFIER : mult)),(Parameter,(Access Var,a)),(Parameter,(Access Var,b)))),(INDENTIFIER : b, =, False)))]
