In [171]:
import re
from sympy import symbols

In [172]:
# Descrição informal da gramática
# O símbolo inicial é <program>
# Não inclui whitespaces

grammar01 = """

<program> = <variable> <constants>? <functions>? <expression> 

<name> = <letter>(<alphanumeric_>)+
<variable> = variable <name>
<constants> = constants <name> (, <name>)*
<functions> = functions <name> (, <name>)*

<decimal> = <alphanumeric>+ [\.<alphanumeric>+]
<expression> = <term> [(+|-) <expression>] 
<term> = <power> [* <term>]
<power> = <factor> [^ <power>]
<factor> =  <decimal> | <name> | (<expression>)

"""

In [173]:
example01 = """

variable x
constants A, B, C
functions f
(A + x + f)

"""

example02 = """

variable t
constants k
functions x, y
k*(x + y)*(x - y)

"""

example03 = """

variable t
constants k
functions x, y
(x + t)*x + 2*t

"""

example04 = """

variable t2
constants A
functions x, y, z
(x+Ay)*(x^2-A-z+t2)'

"""

In [187]:
# Obs: name = [a-zA-Z][a-zA-Z0-9]*
header_regex = r'''\s*variable\s([a-zA-Z][a-zA-Z0-9]*)
(?:\s*constants\s+([a-zA-Z][a-zA-Z0-9]*(?:\s*,\s*[a-zA-Z][a-zA-Z0-9]*)*))?
(?:\s*functions\s+([a-zA-Z][a-zA-Z0-9]*(?:\s*,\s*[a-zA-Z][a-zA-Z0-9]*)*))?(.+)'''

def extract_variables(code):
  m = re.match(header_regex, code,re.S)
  variable, constants, functions, expression = m.groups()

  raw_constants = constants.split(',')
  raw_functions = functions.split(',')

  variables = {}
  variable_name = variable.strip()
  variables[variable_name] = symbols(variable_name, constant=False)
  for v in raw_functions:
    function_name = v.strip()
    variables[function_name] = symbols(function_name, constant=False)
  for c in raw_constants:
    constant_name = c.strip()
    variables[constant_name] = symbols(constant_name, constant=True)

  expression = expression.strip()
  return expression, variables



In [188]:
expression, variables = extract_variables(example04)

In [189]:
expression

"(x+Ay)*(x^2-A-z+t2)'"

In [190]:
variables

{'A': A, 't2': t2, 'x': x, 'y': y, 'z': z}

In [199]:
class Node:
    """
    Representa uma operação com número arbitrário de filhos ou um fator.
    """
    def __init__(self, name, children=None):
        self.children=children
        self.name=name

    def print(self):
        a = self.name
        if self.children is not None:
          a += '('
          a += self.children[0].print()
          for c in range(1, len(self.children)):
            a += ', ' + self.children[c].print()
          a += ')'
        return a

    def evaluate(self, variables):
      pass

class AddNode (Node):
  def __init__(self, name, children=None):
    super().__init__(name, children)

  def evaluate(self, variables):
    if self.name == '+':
      return self.children[0].evaluate(variables) + self.children[1].evaluate(variables)
    return self.children[0].evaluate(variables) - self.children[1].evaluate(variables)

class MultiNode (Node):
  def __init__(self, children=None):
    super().__init__('*', children)
    
  def evaluate(self, variables):
    return self.children[0].evaluate(variables) * self.children[1].evaluate(variables)

class PowerNode (Node):
  def __init__(self, children=None):
    super().__init__('^', children)
    
  def evaluate(self, variables):
    return self.children[0].evaluate(variables) ** self.children[1].evaluate(variables)
  
class LiteralNode (Node):
  def __init__(self, name, children=None):
    super().__init__(name, children)
    
  def evaluate(self, variables):
    try: 
        x = float(self.name)
    except ValueError: 
        x = variables.get(self.name)
        if x is None:
          raise Exception('Variable %s does not exist' % self.name)
    return x
   

class Parser:
    """
    Realiza o parsing.
    """
    def __init__(self, s):
        # lex é um generator que retorna o próximo token encontrado, podendo ser ele:
        # [0-9]+(\.[0-9]+)?        Floats, ex: "4", "5.3", "22.75"
        # [a-zA-Z]([_a-zA-Z0-9])*  Nomes de funções ou variáveis compostos por 
        #         um caractere seguido por 0 ou mais caracteres ou underscores
        # \(|\)|\+|\-|\*           Qualquer outro delimitador válido na linguagem
        self.lex = re.finditer(r'\s*([0-9]+(\.[0-9]+)?|[a-zA-Z]([_a-zA-Z0-9])*|\(|\)|\+|\-|\*|\^)', s)
        self.current = self.next()

    def next(self):
      try:
        match = next(self.lex)
      except StopIteration:
        return '\0'
      return match.group().strip()

    def parse(self):
        return self.Expression()

    def Expression(self):
        l = self.Term()
        if self.current == '+' or self.current == '-':
          op = self.current
          self.current = self.next()
          r = self.Expression()
          if r != None:
              return AddNode(op, [l, r])
          return None
        return l

    def Term(self):
        l = self.Power()
        if self.current == '*':
            self.current = self.next()
            r = self.Term()
            if r == None:
                return None
            return MultiNode([l, r])
        return l
    
    def Power(self):
      l = self.Factor()
      if self.current == '^':
          self.current = self.next()
          r = self.Power()
          if r == None:
              return None
          return PowerNode([l, r])
      return l


    def Factor(self):
        if self.current == '(':
            self.current = self.next()
            r = self.Expression()
            if self.current == ')':
                self.current = self.next()
                return r
            return None
        l = self.current
        self.current = self.next()
        
        return LiteralNode(l)

In [200]:
# Operações básicas
tree = Parser('1*23').parse()
assert tree.print() == "*(1, 23)"
assert '%.2f' % tree.evaluate({}) == '23.00'

tree = Parser('25+   4.74').parse()
assert tree.print() == "+(25, 4.74)"
assert '%.2f' % tree.evaluate({}) == '29.74'

tree = Parser('  32 - 15 ').parse()
assert tree.print() == "-(32, 15)"
assert '%.2f' % tree.evaluate({}) == '17.00'


tree = Parser('  2^5 ').parse()
assert tree.print() == "^(2, 5)"
assert '%.2f' % tree.evaluate({}) == '32.00'

In [197]:
# Ordem das operações
tree = Parser('1*23+5').parse()
assert tree.print() == "+(*(1, 23), 5)"
assert '%.2f' % tree.evaluate({}) == '28.00'

tree = Parser('1 * (23+5)').parse()
assert tree.print() == "*(1, +(23, 5))"
assert '%.2f' % tree.evaluate({}) == '28.00'

In [182]:
# Exemplos complexos - somente parsing, sem avaliação
tree = Parser('k*(x + y)*(x - y)').parse()
assert tree.print() == "*(k, *(+(x, y), -(x, y)))"

tree = Parser('(x + t)*x + 2*t').parse()
assert tree.print() == "+(*(+(x, t), x), *(2, t))"

tree = Parser('(x+Ay)*(x^2-A-z+t2)').parse()
assert tree.print() == "*(+(x, Ay), -(^(x, 2), -(A, +(z, t2))))"

In [208]:
# Método que encapsula extração de variáveis + parsing + avaliação de um código na 
# linguagem definida
def parse_code(code):
  expression, variables = extract_variables(code)
  result = Parser(expression).parse().evaluate(variables)
  return result.expand().simplify()

In [209]:
print(parse_code(example01))

A + f + x


In [210]:
print(parse_code(example02))

k*(x**2 - y**2)


In [211]:
print(parse_code(example03))

t*x + 2.0*t + x**2
