### Retrieving tokens from math expressions

Using regular expressions

In [1]:
from collections import OrderedDict
import re

categories = {
    'operator': ['+', '-', '*', '/', '**'],
    'function': ['exp', 'log', 'sin', 'cos', 'tan'],
    'constant': ['pi', 'e'],
    'variable': ['x', 'y'],
}

individual_regexes = OrderedDict([
    ('constant', r'(?:(?<![A-Za-z_\d\.)])[+\-])?'
                 '(?:\d+\.?\d*|\.\d+)+'
                 '(?:[eE][+\-]?\d+)?'),
    ('symbol', r'[A-Za-z_]+[A-Za-z_\d]*'),
    ('operator', r'[^A-Za-z\d(),\s]+'),
    ('left_parenthesis', r'\('),
    ('right_parenthesis', r'\)'),
    ('separator', r','),
])

regex = re.compile('|'.join([
    r'(?P<{category}>{regex})'.format(category=category, regex=regex)
    for category, regex in individual_regexes.items()
]))


def parse(pair):
    category, token = (pair[0], pair[1])
    if category in ['operator', 'symbol']:
        category, _ = next(filter(
            lambda pair: token in pair[1],
            list(categories.items()) + [('undefined_symbol', token)]
        ))
    return {category: token}


def tokenize(expression):
    return list(map(
        lambda match: parse(next(filter(
            lambda pair: pair[1] is not None,
            match.groupdict().items()
        ))),
        re.finditer(regex, expression)
    ))

Global regex used (paste it to [regex101.com](https://regex101.com) and have fun!):

In [2]:
print(regex.pattern)

(?P<constant>(?:(?<![A-Za-z_\d\.)])[+\-])?(?:\d+\.?\d*|\.\d+)+(?:[eE][+\-]?\d+)?)|(?P<symbol>[A-Za-z_]+[A-Za-z_\d]*)|(?P<operator>[^A-Za-z\d(),\s]+)|(?P<left_parenthesis>\()|(?P<right_parenthesis>\))|(?P<separator>,)


Lets extract some tokens from an expression:

In [3]:
tokenize('4-3*sin(2*pi*x)')

[{'constant': '4'},
 {'operator': '-'},
 {'constant': '3'},
 {'operator': '*'},
 {'function': 'sin'},
 {'left_parenthesis': '('},
 {'constant': '2'},
 {'operator': '*'},
 {'constant': 'pi'},
 {'operator': '*'},
 {'variable': 'x'},
 {'right_parenthesis': ')'}]

Copyright 2016 **Alberto Lorenzo**
Released under **[MIT](https://opensource.org/licenses/MIT) License**