In [5]:
import re
import pandas as pd

#inputData = "42+(675*31)-20925+2**2"
inputData = """
@TETTETEETE
-1+1;
1+2+4+5;
-1+3+6+-5;
42+(675*31)-20925+2**2;
"""

rules = [
    ("OPERATION", r'(\*\*|\+|\-|\*|\/)'),
    ("NUMBER", r'[+-]?[0-9]+'),
    ("OPEN_PARENTHESES", r'\('),
    ("CLOSE_PARENTHESES", r'\)'),
    ("EOL", r';'),
    ("NEW_LINE", r'\n'),
    ('ERROR_CODE', r'.')
]

values_operations = {"+" : "SUM",
                     "-" : "SUBTRACTION",
                     "*" : "MULTIPLY",
                     "/" : "DIVISION",
                     "**": "EXPONENTIAL"}

In [6]:
def remove_comment(input_code):
    return re.sub(r'@.*', '', input_code)

def check_eol(input_code):
    list_strings = [s.strip() for s in input_code.splitlines() if s]
    
    for l in list_strings:
        if not l.endswith(";"):
            raise Exception("Sintaxe error!")

def tokenize(rules, input_code):
    all_rules_regex = '|'.join('(?P<%s>%s)' % x for x in rules)
    accu_line = 1
    result = []
    for m in re.finditer(all_rules_regex, input_code):
        line = {}
        if m.lastgroup == "ERROR_CODE":
            raise Exception("%s Erro on the line %d" % (m.group(), accu_line))
        if m.lastgroup == "NEW_LINE":
            accu_line += 1
            continue

        line["lexeme"] = m.group()
        line["type"] = m.lastgroup

        # Set token
        if m.lastgroup == "OPERATION":
            line["token"] = values_operations[m.group()]
        elif m.lastgroup == "NUMBER":
            line["token"] = m.group()
        else:
            line["token"] = m.lastgroup
        
        
        result.append(line)
    return result

In [7]:
def execute_all_process(rules, input_code):
    input_code = remove_comment(input_code)
    check_eol(input_code)
    return pd.DataFrame(data=tokenize(rules, input_code))


In [8]:
execute_all_process(rules, inputData)

Unnamed: 0,lexeme,type,token
0,-,OPERATION,SUBTRACTION
1,1,NUMBER,1
2,+,OPERATION,SUM
3,1,NUMBER,1
4,;,EOL,EOL
5,1,NUMBER,1
6,+,OPERATION,SUM
7,2,NUMBER,2
8,+,OPERATION,SUM
9,4,NUMBER,4
