In [1]:
import re
import sympy

In [2]:
# Descrição informal da gramática
# O símbolo inicial é <program>
# Não inclui whitespaces

grammar01 = """

<program> = <variable> <constants>? <functions>? <expression> 

<variable> = variable <name>
<constants> = constants <name> (, <name>)*
<functions> = functions <name> (, <name>)*

<expression> = <term> | <expression> [(+|-) <term>] 
<term> = <power> [* <term>]
<power> = <factor> [^ <power>]
<factor> =  <decimal> | <name> | (<expression>)

<name> = <letter>(<alphanumeric_>)+
<decimal> = <digit>+ [\.<digit>+]

"""

In [3]:
example01 = """

variable x
constants A, B, C
functions f
(A + x + f)

"""

example02 = """

variable t
constants k
functions x, y
k*(x + y)*(x - y)

"""

example03 = """

variable t
constants k
functions x, y
(x + t)*x + 2*t

"""

example04 = """

variable t2
constants A
functions x, y, z
(x+Ay)*(x^2-A-z+t2)'

"""

In [4]:
class MathematicalObjects:
  """
  Mapeia todas as funções e constantes e variáveis existentes a symbols do sympy.
  """
  def __init__(self):
    self.objects = {}
  
  def add(self, object_name, constant=False):
    if self.objects.get(object_name) != None:
      raise Exception('Object %s already exists', object_name)
    self.objects[object_name] = sympy.symbols(object_name, constant=constant)
  
  def get(self, object_name):
    x = self.objects.get(object_name)
    if x is None:
      raise Exception('Object %s does not exist' % object_name)
    return x

In [5]:
# name: uma letra opcionalmente seguida de outras letras, números ou underscores, 
# ex: x, x2, xlinha, x_linha

name = r'[a-zA-Z][a-zA-Z0-9_]*'

# name_list: sequência de um ou mais names separados por vírgula

name_list = r'({name}(?:\s*,\s*{name})*)'.format(name=name)

header_regex = r'''\s*variable\s({name})
(?:\s*constants\s+{name_list})?
(?:\s*functions\s+{name_list})?
(.+)'''.format(name=name, name_list=name_list)

def preprocess(code):
  """
  Extrai os objetos matemáticos e as expressões do código inserido.
  """
  m = re.match(header_regex, code,re.S)
  variable, constants, functions, expression = m.groups()

  raw_constants = constants.split(',')
  raw_functions = functions.split(',')

  objects = MathematicalObjects()
  object_name = variable.strip()
  objects.add(object_name, constant=False) 

  for v in raw_functions:
    objects.add(v.strip(), constant=False) 
  for c in raw_constants:
    objects.add(c.strip(), constant=True) 

  expression = expression.strip()
  return expression, objects



In [6]:
expression, objects = preprocess(example04)

In [7]:
expression

"(x+Ay)*(x^2-A-z+t2)'"

In [8]:
objects.objects

{'A': A, 't2': t2, 'x': x, 'y': y, 'z': z}

In [9]:
class Node:
  """
  Classe abstrata que representa uma operação com número arbitrário de filhos.
  """
  def __init__(self, name, children=None):
      self.children=children
      self.name=name

  def print(self):
      a = self.name
      if self.children is not None:
        a += '('
        a += self.children[0].print()
        for c in range(1, len(self.children)):
          a += ', ' + self.children[c].print()
        a += ')'
      return a

  def evaluate(self, objects):
    pass

class AddNode (Node):
  def __init__(self, children=None):
    super().__init__('+', children)

  def evaluate(self, objects):
    return self.children[0].evaluate(objects) + self.children[1].evaluate(objects)

class SubNode (Node):
  def __init__(self, children=None):
    super().__init__('-', children)

  def evaluate(self, objects):
    return self.children[0].evaluate(objects) - self.children[1].evaluate(objects)

class MultiNode (Node):
  def __init__(self, children=None):
    super().__init__('*', children)
    
  def evaluate(self, objects):
    return self.children[0].evaluate(objects) * self.children[1].evaluate(objects)

class PowerNode (Node):
  def __init__(self, children=None):
    super().__init__('^', children)
    
  def evaluate(self, objects):
    return self.children[0].evaluate(objects) ** self.children[1].evaluate(objects)
  
class LiteralNode (Node):
  def __init__(self, name, children=None):
    super().__init__(name, children)
    
  def evaluate(self, objects):
    return sympy.N(self.name)
  
class ObjectNode (Node):
  def __init__(self, name, children=None):
    super().__init__(name, children)
    
  def evaluate(self, objects):
    return objects.get(self.name)

In [10]:
class Token(object):
  """
  Token com tipo e valor
  """
  def __init__(self, type, value):
      self.type = type
      self.value = value

class Lexer:
  """
  Análisador léxico.
  """
  def __init__(self, code):
    # os tokens podem ser:
    # [0-9]+(\.[0-9]+)?        Floats, ex: "4", "5.3", "22.75"
    # [a-zA-Z]([_a-zA-Z0-9])*  Nomes de funções ou variáveis compostos por 
    #         um caractere seguido por 0 ou mais caracteres ou underscores
    # \(|\)|\+|\-|\*           Qualquer outro delimitador válido na linguagem
    self.code = code
    self.pos = 0
    literal = '[0-9]+(\.[0-9]+)?'
    name = '[a-zA-Z]([_a-zA-Z0-9])*'
    # regex nomeada que extrai tipo do literal e valor
    token_regex = r'''(?P<LITERAL>{literal})|(?P<NAME>{name})|(?P<OPEN_PARENTHESIS>\()|(?P<CLOSE_PARENTHESIS>\))|(?P<PLUS>\+)|(?P<MINUS>\-)|(?P<TIMES>\*)|(?P<POWER>\^)'''.format(literal=literal, name=name)
    self.regex = re.compile(token_regex)
    self.skip_whitespace = re.compile('\S')
  
  def token(self):
    """ 
    Retorna o próximo token e avança pos
    """
    if self.pos >= len(self.code):
      return None
    
    # Salta espaços vazios
    m = self.skip_whitespace.search(self.code, self.pos)
    if m:
        self.pos = m.start()
    else:
      return None

    # Faz o metch com a regex de tokens
    m = self.regex.match(self.code, self.pos)
    if m:
      groupname = m.lastgroup
      tok = Token(groupname, m.group(groupname))
      self.pos = m.end()
      return tok

    raise Exception('Lexical error at position %s' % self.pos)

  def tokens(self):
    """ 
    Retorna iterator que percorre os tokens
    """
    while 1:
      tok = self.token()
      if tok is None: yield Token('EOF', 'EOF')
      yield tok


In [20]:
class Parser:
    """
    Realiza o parsing.
    """
    def __init__(self, s):
      self.lex = Lexer(s).tokens()
      self.current = self.next()

    def next(self):
      return next(self.lex)

    def parse(self):
      return self.Root()

    def Root(self):
      l = self.Term()
      return self.Expression(l)

    def Expression(self, left_side):
        if self.current.type == 'PLUS':
          self.current = self.next()
          r = self.Term()
          if r is None:
            return None
          return self.Expression(AddNode([left_side, r]))
        elif self.current.type == 'MINUS':
          self.current = self.next()
          r = self.Term()
          if r is None:
            return None
          return self.Expression(SubNode([left_side, r]))
        
        return left_side

    def Term(self):
        l = self.Power()
        if self.current.type == 'TIMES':
            self.current = self.next()
            r = self.Term()
            if r == None:
                return None
            return MultiNode([l, r])
        return l
    
    def Power(self):
      l = self.Factor()
      if self.current.type == 'POWER':
          self.current = self.next()
          r = self.Power()
          if r == None:
              return None
          return PowerNode([l, r])
      return l


    def Factor(self):
        if self.current.type == 'OPEN_PARENTHESIS':
            self.current = self.next()
            r = self.Root()
            if self.current.type == 'CLOSE_PARENTHESIS':
                self.current = self.next()
                return r
            return None

        if self.current.type == 'LITERAL':
          node = LiteralNode(self.current.value)
          self.current = self.next()
          return node
        elif self.current.type == 'NAME':
          node = ObjectNode(self.current.value)
          self.current = self.next()
          return node

        raise Exception('Syntax error')
        

In [21]:
# Operações básicas
tree = Parser('1*23').parse()
assert tree.print() == "*(1, 23)"
assert '%.2f' % tree.evaluate({}) == '23.00'

tree = Parser('25+   4.74').parse()
assert tree.print() == "+(25, 4.74)"
assert '%.2f' % tree.evaluate({}) == '29.74'

tree = Parser('  32 - 15 ').parse()
assert tree.print() == "-(32, 15)"
assert '%.2f' % tree.evaluate({}) == '17.00'


tree = Parser('  2^5 ').parse()
assert tree.print() == "^(2, 5)"
assert '%.2f' % tree.evaluate({}) == '32.00'

In [22]:
# Ordem das operações
tree = Parser('1*23+5').parse()
assert tree.print() == "+(*(1, 23), 5)"
assert '%.2f' % tree.evaluate({}) == '28.00'

tree = Parser('1 * (23+5)').parse()
assert tree.print() == "*(1, +(23, 5))"
assert '%.2f' % tree.evaluate({}) == '28.00'

In [23]:
# Exemplos complexos - somente parsing, sem avaliação
tree = Parser('k*(x + y)*(x - y)').parse()
assert tree.print() == "*(k, *(+(x, y), -(x, y)))"

tree = Parser('(x + t)*x + 2*t').parse()
assert tree.print() == "+(*(+(x, t), x), *(2, t))"

tree = Parser('(x+Ay)*(x^2-A-z+t2)').parse()
assert tree.print() == "*(+(x, Ay), +(-(-(^(x, 2), A), z), t2))"

In [24]:
# Método que encapsula extração de objetos + parsing + avaliação de um código na 
# linguagem definida
def parse_code(code):
  expression, objects = preprocess(code)
  expression_tree = Parser(expression).parse()
  result = expression_tree.evaluate(objects)
  return result.expand()

In [25]:
example01 = """

variable x
constants A, B, C
functions f
(A + x + f)

"""
print(parse_code(example01))

A + f + x


In [26]:
example02 = """

variable t
constants k
functions x, y
k*(x + y)*(x - y)

"""
print(parse_code(example02))

k*x**2 - k*y**2


In [27]:
example03 = """

variable t
constants k
functions x, y
(x + t)*x + 2*t

"""
print(parse_code(example03))

t*x + 2.0*t + x**2


In [28]:
example = """

variable teste_
constants A
functions f
(A*f + teste_ + f)

"""
print(parse_code(example))

A*f + f + teste_
