# **Implementação de Compiladores**
## Análise Léxica

In [1]:
import ply.lex as lex

In [3]:
# List of token names.   This is always required
tokens = (
    'NUMBER',
    'PLUS',
    'MINUS',
    'TIMES',
    'DIVIDE',
    'LPAREN',
    'RPAREN',
)


In [4]:
# Regular expression rules for simple tokens
t_PLUS    = r'\+'
t_MINUS   = r'-'
t_TIMES   = r'\*'
t_DIVIDE  = r'/'
t_LPAREN  = r'\('
t_RPAREN  = r'\)'

In [3]:
# A regular expression rule with some action code
def t_NUMBER(t):
    r'\d+'
    t.value = int(t.value)    
    return t
 
# Define a rule so we can track line numbers
def t_newline(t):
    r'\n+'
    t.lexer.lineno += len(t.value)
 
# A string containing ignored characters (spaces and tabs)
t_ignore  = ' \t'
 
# Error handling rule
def t_error(t):
    print("Illegal character '%s'" % t.value[0])
    t.lexer.skip(1)
 
# Build the lexer
__file__ = "01-compiladores-analise-lexica.ipynb"
lexer = lex.lex()

In [14]:
data = '''
 3 + 4 * 10
   + -20 *2
 '''

In [15]:
# Give the lexer some input
lexer.input(data)

In [16]:
# Tokenize
while True:
    tok = lexer.token()
    if not tok: 
        break      # No more input
    print(tok.type, tok.value, tok.lineno, tok.lexpos)

NUMBER 3 33 2
ADICAO + 33 4
NUMBER 4 33 6
MULTIPLICACAO * 33 8
NUMBER 10 33 10
ADICAO + 34 16
NUM_INTEIRO -20 34 18
MULTIPLICACAO * 34 22
NUMBER 2 34 23
