In [138]:
import re
from sympy import symbols, N

In [139]:
# Descrição informal da gramática
# O símbolo inicial é <program>
# Não inclui whitespaces

grammar01 = """

<program> = <variable> <constants>? <functions>? <expression> 

<name> = <letter>(<alphanumeric_>)+
<variable> = variable <name>
<constants> = constants <name> (, <name>)*
<functions> = functions <name> (, <name>)*

<decimal> = <alphanumeric>+ [\.<alphanumeric>+]
<expression> = <term> [(+|-) <expression>] 
<term> = <power> [* <term>]
<power> = <factor> [^ <power>]
<factor> =  <decimal> | <name> | (<expression>)

"""

In [140]:
example01 = """

variable x
constants A, B, C
functions f
(A + x + f)

"""

example02 = """

variable t
constants k
functions x, y
k*(x + y)*(x - y)

"""

example03 = """

variable t
constants k
functions x, y
(x + t)*x + 2*t

"""

example04 = """

variable t2
constants A
functions x, y, z
(x+Ay)*(x^2-A-z+t2)'

"""

In [141]:
class Variables:
  """
  Representa todas as funções e constantes existentes.
  """
  def __init__(self):
    self.variables = {}
  
  def add(self, variable_name, constant=False):
    if self.variables.get(variable_name) != None:
      raise Exception('Variable %s already exists', variable_name)
    self.variables[variable_name] = symbols(variable_name, constant=False)
  
  def get(self, variable_name):
    x = self.variables.get(variable_name)
    if x is None:
      raise Exception('Variable %s does not exist' % variable_name)
    return x

In [142]:
# Obs: name = [a-zA-Z][a-zA-Z0-9]*
name = r'[a-zA-Z][a-zA-Z0-9]*'
name_list = r'({name}(?:\s*,\s*{name})*)'.format(name=name)

header_regex = r'''\s*variable\s({name})
(?:\s*constants\s+{name_list})?
(?:\s*functions\s+{name_list})?
(.+)'''.format(name=name, name_list=name_list)

def preprocess(code):
  """
  Extrai a expressão e as variáveis do código inserido.
  """
  m = re.match(header_regex, code,re.S)
  variable, constants, functions, expression = m.groups()

  raw_constants = constants.split(',')
  raw_functions = functions.split(',')

  variables = Variables()
  variable_name = variable.strip()
  variables.add(variable_name, constant=False) 
  for v in raw_functions:
    variables.add(v.strip(), constant=False) 
  for c in raw_constants:
    variables.add(c.strip(), constant=True) 

  expression = expression.strip()
  return expression, variables



In [143]:
expression, variables = preprocess(example04)

In [144]:
expression

"(x+Ay)*(x^2-A-z+t2)'"

In [145]:
variables.variables

{'A': A, 't2': t2, 'x': x, 'y': y, 'z': z}

In [146]:
class Node:
    """
    Classe abstrata que representa uma operação com número arbitrário de filhos.
    """
    def __init__(self, name, children=None):
        self.children=children
        self.name=name

    def print(self):
        a = self.name
        if self.children is not None:
          a += '('
          a += self.children[0].print()
          for c in range(1, len(self.children)):
            a += ', ' + self.children[c].print()
          a += ')'
        return a

    def evaluate(self, variables):
      pass

class AddNode (Node):
  def __init__(self, children=None):
    super().__init__('+', children)

  def evaluate(self, variables):
    return self.children[0].evaluate(variables) + self.children[1].evaluate(variables)

class SubNode (Node):
  def __init__(self, children=None):
    super().__init__('-', children)

  def evaluate(self, variables):
    return self.children[0].evaluate(variables) - self.children[1].evaluate(variables)

class MultiNode (Node):
  def __init__(self, children=None):
    super().__init__('*', children)
    
  def evaluate(self, variables):
    return self.children[0].evaluate(variables) * self.children[1].evaluate(variables)

class PowerNode (Node):
  def __init__(self, children=None):
    super().__init__('^', children)
    
  def evaluate(self, variables):
    return self.children[0].evaluate(variables) ** self.children[1].evaluate(variables)
  
class LiteralNode (Node):
  def __init__(self, name, children=None):
    super().__init__(name, children)
    
  def evaluate(self, variables):
    return N(self.name)
  
class VariableNode (Node):
  def __init__(self, name, children=None):
    super().__init__(name, children)
    
  def evaluate(self, variables):
    return variables.get(self.name)

In [147]:
class Token(object):
    """
    Token com tipo e valor
    """
    def __init__(self, type, value):
        self.type = type
        self.value = value

class Lexer:
  """
  Análisador léxico.
  """
  def __init__(self, code):
    # os tokens podem ser:
    # [0-9]+(\.[0-9]+)?        Floats, ex: "4", "5.3", "22.75"
    # [a-zA-Z]([_a-zA-Z0-9])*  Nomes de funções ou variáveis compostos por 
    #         um caractere seguido por 0 ou mais caracteres ou underscores
    # \(|\)|\+|\-|\*           Qualquer outro delimitador válido na linguagem
    self.code = code
    self.pos = 0
    literal = '[0-9]+(\.[0-9]+)?'
    name = '[a-zA-Z]([_a-zA-Z0-9])*'
    regex = r'''(?P<LITERAL>{literal})|(?P<NAME>{name})|(?P<OPEN_PARENTHESIS>\()|(?P<CLOSE_PARENTHESIS>\))|(?P<PLUS>\+)|(?P<MINUS>\-)|(?P<TIMES>\*)|(?P<POWER>\^)'''.format(literal=literal, name=name)
    self.regex = re.compile(regex)
    self.re_ws_skip = re.compile('\S')
  
  def token(self):
    if self.pos >= len(self.code):
      return None
        
    m = self.re_ws_skip.search(self.code, self.pos)
    if m:
        self.pos = m.start()
    else:
      return None

    m = self.regex.match(self.code, self.pos)
    if m:
      groupname = m.lastgroup
      tok = Token(groupname, m.group(groupname))
      self.pos = m.end()
      return tok

    raise Exception('Lexical error at position %s' % self.pos)

  def tokens(self):
    """ 
    Retorna iterator que percorre os tokens
    """
    while 1:
      tok = self.token()
      if tok is None: yield Token('EOF', 'EOF')
      yield tok


In [148]:
class Parser:
    """
    Realiza o parsing.
    """
    def __init__(self, s):
      self.lex = Lexer(s).tokens()
      self.current = self.next()

    def next(self):
      return next(self.lex)

    def parse(self):
      return self.Expression()

    def Expression(self):
        l = self.Term()

        if self.current.type == 'PLUS':
          self.current = self.next()
          r = self.Expression()
          if r != None:
              return AddNode([l, r])
          return None

        if self.current.type == 'MINUS':
          self.current = self.next()
          r = self.Expression()
          if r != None:
              return SubNode([l, r])
          return None

        return l

    def Term(self):
        l = self.Power()
        if self.current.type == 'TIMES':
            self.current = self.next()
            r = self.Term()
            if r == None:
                return None
            return MultiNode([l, r])
        return l
    
    def Power(self):
      l = self.Factor()
      if self.current.type == 'POWER':
          self.current = self.next()
          r = self.Power()
          if r == None:
              return None
          return PowerNode([l, r])
      return l


    def Factor(self):
        if self.current.type == 'OPEN_PARENTHESIS':
            self.current = self.next()
            r = self.Expression()
            if self.current.type == 'CLOSE_PARENTHESIS':
                self.current = self.next()
                return r
            return None

        if self.current.type == 'LITERAL':
          node = LiteralNode(self.current.value)
          self.current = self.next()
          return node
        elif self.current.type == 'NAME':
          node = VariableNode(self.current.value)
          self.current = self.next()
          return node

        raise Exception('Syntax error')
        

In [149]:
# Operações básicas
tree = Parser('1*23').parse()
assert tree.print() == "*(1, 23)"
assert '%.2f' % tree.evaluate({}) == '23.00'

tree = Parser('25+   4.74').parse()
assert tree.print() == "+(25, 4.74)"
assert '%.2f' % tree.evaluate({}) == '29.74'

tree = Parser('  32 - 15 ').parse()
assert tree.print() == "-(32, 15)"
assert '%.2f' % tree.evaluate({}) == '17.00'


tree = Parser('  2^5 ').parse()
assert tree.print() == "^(2, 5)"
assert '%.2f' % tree.evaluate({}) == '32.00'

In [150]:
# Ordem das operações
tree = Parser('1*23+5').parse()
assert tree.print() == "+(*(1, 23), 5)"
assert '%.2f' % tree.evaluate({}) == '28.00'

tree = Parser('1 * (23+5)').parse()
assert tree.print() == "*(1, +(23, 5))"
assert '%.2f' % tree.evaluate({}) == '28.00'

In [151]:
# Exemplos complexos - somente parsing, sem avaliação
tree = Parser('k*(x + y)*(x - y)').parse()
assert tree.print() == "*(k, *(+(x, y), -(x, y)))"

tree = Parser('(x + t)*x + 2*t').parse()
assert tree.print() == "+(*(+(x, t), x), *(2, t))"

tree = Parser('(x+Ay)*(x^2-A-z+t2)').parse()
assert tree.print() == "*(+(x, Ay), -(^(x, 2), -(A, +(z, t2))))"

In [152]:
# Método que encapsula extração de variáveis + parsing + avaliação de um código na 
# linguagem definida
def parse_code(code):
  expression, variables = preprocess(code)
  result = Parser(expression).parse().evaluate(variables)
  return result.expand()

In [155]:
print(parse_code(example01))

A + f + x


In [156]:
print(parse_code(example02))

k*x**2 - k*y**2


In [157]:
print(parse_code(example03))

t*x + 2.0*t + x**2


In [154]:
print(parse_code('''
variable t
constants A
functions x
2
'''
))

2.00000000000000
