<a href="https://colab.research.google.com/github/reidnersousa/IFB_Compiladores_2023/blob/main/AnalisadorLexicoComTabelaSimbolos.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [41]:
class Token:
    def __init__(self, type_, value):
        self.type = type_
        self.value = value

    def __str__(self):
        return f'Token({self.type}, {self.value})'

class Lexer:
    def __init__(self, text):
        self.text = text
        self.pos = 0
        self.current_char = self.text[self.pos]
        self.reserved_keywords = {
            'if': Token('IF', 'if'),
            'else': Token('ELSE', 'else'),
            'while': Token('WHILE', 'while'),
            'int': Token('INT', 'int'),
            'float': Token('FLOAT', 'float'),
            'string': Token('STRING', 'string')
        }
        self.symbol_table = {}

    def error(self):
        raise Exception('Caractere inválido')

    def advance(self):
        self.pos += 1
        if self.pos > len(self.text) - 1:
            self.current_char = None
        else:
            self.current_char = self.text[self.pos]

    def skip_whitespace(self):
        while self.current_char is not None and self.current_char.isspace():
            self.advance()

    def get_number(self):
        result = ''
        while self.current_char is not None and (self.current_char.isdigit() or self.current_char == '.'):
            result += self.current_char
            self.advance()
        if '.' in result:
            return Token('FLOAT', float(result))
        else:
            return Token('INTEGER', int(result))

    def get_string(self):
            result = ''
            self.advance()
            while self.current_char is not None and self.current_char != "'":
                result += self.current_char
                self.advance()
            self.advance()
            return Token('STRING', result)



    def get_next_token(self):
        while self.current_char is not None:

            if self.current_char.isspace():
                self.skip_whitespace()
                continue

            if self.current_char.isdigit() or self.current_char == '.':
                return self.get_number()

            if self.current_char == '+':
                self.advance()
                return Token('PLUS', '+')

            if self.current_char == '-':
                self.advance()
                return Token('MINUS', '-')

            if self.current_char == '*':
                self.advance()
                return Token('MULT', '*')

            if self.current_char == '/':
                self.advance()
                return Token('DIV', '/')

            if self.current_char == '(':
                self.advance()
                return Token('LPAREN', '(')

            if self.current_char == ')':
                self.advance()
                return Token('RPAREN', ')')

            if self.current_char.isalpha():
                identifier = ''
                while self.current_char is not None and (self.current_char.isalpha() or self.current_char.isdigit()):
                    identifier += self.current_char
                    self.advance()
                if identifier in self.reserved_keywords:
                    return self.reserved_keywords[identifier]
                if identifier in self.symbol_table:
                    return self.symbol_table[identifier]
                token = Token('IDENTIFIER', identifier)
                self.symbol_table[identifier] = token
                return token

            if self.current_char == '=':
                self.advance()
                return Token('ASSIGN', '=')

            if self.current_char == ';':
                self.advance()
                return Token('SEMI', ';')

            if self.current_char == '{':
                self.advance()
                return Token('LBRACE', '{')

            if self.current_char == '}':
                self.advance()
                return Token('RBRACE', '}')

            self.error()

        return Token('EOF', None)



teste

In [43]:
#lexer = Lexer('int x = 42; float y = 3.14; while (x > 0) { y = y / 2; x = x - 1; }')erro
# lexer = Lexer ('int x= 42 float y = 3.14;') ## ok
# lexer = Lexer ('int x=1 ; int y=2; z; z = x + y;')
lexer = Lexer ('int valor = 10 ; int x =10 ; divisao = valor / x') # ok

#lexer = Lexer ('string nome = 'aberto' ')  erro 
#lexer = Lexer('11>10;') # erro #
while True:
    token = lexer.get_next_token()
    print(token)
    if token.type == 'EOF':
        break


Token(INT, int)
Token(IDENTIFIER, valor)
Token(ASSIGN, =)
Token(INTEGER, 10)
Token(SEMI, ;)
Token(INT, int)
Token(IDENTIFIER, x)
Token(ASSIGN, =)
Token(INTEGER, 10)
Token(SEMI, ;)
Token(IDENTIFIER, divisao)
Token(ASSIGN, =)
Token(IDENTIFIER, valor)
Token(DIV, /)
Token(IDENTIFIER, x)
Token(EOF, None)


True