In [1]:
import re
import sympy

In [2]:
# Descrição informal da gramática
# O símbolo inicial é <program>
# Não inclui whitespaces

grammar01 = """

<program> = <variable> <constants>? <functions>? <expression> 

<variable> = variable <name>
<constants> = constants <name> (, <name>)*
<functions> = functions <name> (, <name>)*

<expression> = <term> | <sum> | <term> <sum>
<sum> = (+ | -) <term> [<sum>]
<term> = <power> [* <term>]
<power> = <base> [^ <power>]
<base> = <factor> [']
<factor> =  <literal> | <name> | (<expression>)

<name> = <letter>(<alphanumeric_>)+
<literal> = <digit>+ [\.<digit>+]

"""

In [16]:
example01 = """

variable x
constants A, B, C
functions f
(A + x + f)'

"""

example02 = """

variable t
constants k
functions x, y
k*(x + y)*(x - y)

"""

example03 = """

variable t
constants k
functions x, y
(x + t)*x + 2*t

"""

example04 = """

variable t2
constants A
functions x, y, z
(x+Ay)*(x^2-A-z+t2)'

"""

In [17]:
class MathematicalObjects:
  """
  Mapeia todas as funções e constantes e variáveis existentes a symbols do sympy.
  """
  def __init__(self):
    self.objects = {}
    self.variable = None
  
  def add_constant(self, constant_name):
    if self.objects.get(constant_name) != None:
      raise Exception('Object %s already exists', constant_name)
    self.objects[constant_name] = sympy.symbols(constant_name, constant=True)

  def add_function(self, function_name):
    if self.objects.get(function_name) != None:
      raise Exception('Object %s already exists', function_name)
    if self.variable is None:
      raise Exception('Variable need to be set before functions')
    self.objects[function_name] = sympy.Function(function_name)(self.variable)
  
  def set_variable(self, variable_name):
    self.variable = sympy.symbols(variable_name, constant=False)
    self.objects[variable_name] = self.variable
  
  def get(self, object_name):
    x = self.objects.get(object_name)
    if x is None:
      raise Exception('Object %s does not exist' % object_name)
    return x

In [144]:
class BaseTokens:
  letter = r'[a-zA-Z]'
  alphanumeric_ = r'[a-zA-Z0-9_]'
  numeric = r'[0-9]'
  # name: uma letra opcionalmente seguida de outras letras, números ou underscores, 
  # ex: x, x2, xlinha, x_linha
  name = r'{letter}{alphanumeric_}*'.format(letter=letter, alphanumeric_=alphanumeric_)
  # name_list: sequência de um ou mais names separados por vírgula
  name_list = r'({name}(?:\s*,\s*{name})*)'.format(name=name)
  literal = r'{numeric}+(?:\.({numeric})+)?'.format(numeric=numeric)


In [145]:
header_regex = r'''\s*variable\s({name})
(?:\s*constants\s+{name_list})?
(?:\s*functions\s+{name_list})?
(.+)'''.format(name=BaseTokens.name, name_list=BaseTokens.name_list)

def preprocess(code):
  """
  Extrai os objetos matemáticos e as expressões do código inserido.
  """
  m = re.match(header_regex, code,re.S)
  variable, constants, functions, expression = m.groups()

  raw_constants = constants.split(',')
  raw_functions = functions.split(',')

  objects = MathematicalObjects()
  variable_name = variable.strip()
  objects.set_variable(variable_name)

  for v in raw_functions:
    objects.add_function(v.strip()) 
  for c in raw_constants:
    objects.add_constant(c.strip()) 

  expression = expression.strip()
  return expression, objects



In [146]:
expression, objects = preprocess(example04)

In [147]:
expression

"(x+A*y)*(x^2-A-z+t2)'"

In [148]:
objects.objects

{'A': A, 't2': t2, 'x': x(t2), 'y': y(t2), 'z': z(t2)}

In [149]:
print(objects.variable)

t2


In [150]:
class Node:
  """
  Classe abstrata que representa uma operação com número arbitrário de filhos.
  """
  def __init__(self, name, children=None):
      self.children=children
      self.name=name

  def print(self):
      a = self.name
      if self.children is not None:
        a += '('
        a += self.children[0].print()
        for c in range(1, len(self.children)):
          a += ', ' + self.children[c].print()
        a += ')'
      return a

  def evaluate(self, objects):
    pass

class AddNode (Node):
  def __init__(self, children=None):
    super().__init__('+', children)

  def evaluate(self, objects):
    return self.children[0].evaluate(objects) + self.children[1].evaluate(objects)

class MultiNode (Node):
  def __init__(self, children=None):
    super().__init__('*', children)
    
  def evaluate(self, objects):
    return self.children[0].evaluate(objects) * self.children[1].evaluate(objects)

class PowerNode (Node):
  def __init__(self, children=None):
    super().__init__('^', children)
    
  def evaluate(self, objects):
    return self.children[0].evaluate(objects) ** self.children[1].evaluate(objects)

class DerivativeNode (Node):
  def __init__(self, node):
    super().__init__('\'', [node])
    
  def evaluate(self, objects):
    return sympy.diff(self.children[0].evaluate(objects), objects.variable)

class InversorNode (Node):
  def __init__(self, node):
    super().__init__('-', [node])
    
  def evaluate(self, objects):
    return - self.children[0].evaluate(objects)
   
class LiteralNode (Node):
  def __init__(self, name, children=None):
    super().__init__(name, children)
    
  def evaluate(self, objects):
    return sympy.N(self.name)
  
class ObjectNode (Node):
  def __init__(self, name, children=None):
    super().__init__(name, children)
    
  def evaluate(self, objects):
    return objects.get(self.name)

In [151]:
class Token(object):
  """
  Token com tipo e valor
  """
  def __init__(self, type, value):
      self.type = type
      self.value = value

class Lexer:
  """
  Análisador léxico.
  """
  def __init__(self, code):
    # os tokens podem ser:
    # [0-9]+(\.[0-9]+)?        Floats, ex: "4", "5.3", "22.75"
    # [a-zA-Z]([_a-zA-Z0-9])*  Nomes de funções ou variáveis compostos por 
    #         um caractere seguido por 0 ou mais caracteres ou underscores
    # \(|\)|\+|\-|\*           Qualquer outro delimitador válido na linguagem
    self.code = code
    self.pos = 0
    # regex nomeada que extrai tipo do literal e valor
    token_regex = r'''(?P<LITERAL>{literal})|(?P<NAME>{name})|(?P<OPEN_PARENTHESIS>\()|(?P<CLOSE_PARENTHESIS>\))|(?P<PLUS>\+)|(?P<MINUS>\-)|(?P<TIMES>\*)|(?P<POWER>\^)|(?P<DERIVATIVE>\')'''.format(literal=BaseTokens.literal, name=BaseTokens.name)
    self.regex = re.compile(token_regex)
    self.skip_whitespace = re.compile('\S')
  
  def token(self):
    """ 
    Retorna o próximo token e avança pos
    """
    if self.pos >= len(self.code):
      return None
    
    # Salta espaços vazios
    m = self.skip_whitespace.search(self.code, self.pos)
    if m:
        self.pos = m.start()
    else:
      return None

    # Faz o metch com a regex de tokens
    m = self.regex.match(self.code, self.pos)
    if m:
      groupname = m.lastgroup
      tok = Token(groupname, m.group(groupname))
      self.pos = m.end()
      return tok

    raise Exception('Lexical error at position %s' % self.pos)

  def tokens(self):
    """ 
    Retorna iterator que percorre os tokens
    """
    while 1:
      tok = self.token()
      if tok is None: yield Token('EOF', 'EOF')
      yield tok


In [152]:
class Parser:
    """
    Realiza o parsing.
    """
    def __init__(self, s):
      self.lex = Lexer(s).tokens()
      self.current = self.next()

    def next(self):
      return next(self.lex)

    def parse(self):
      return self.Expression()
    
    def accept(self, c):
      if self.current.type == c:
        self.current = self.next()
        return True
      return False

    def expect(self, f):
      sentence = f()
      if sentence is None:
        raise Exception('Syntax error')
      return sentence

    def Expression(self):
        if self.current.type == 'PLUS' or self.current.type == 'MINUS':
          return self.Sum()
        else:
          left_side = self.expect(self.Term)
          if self.current.type == 'PLUS' or self.current.type == 'MINUS':
            right_side = self.Sum()
            return AddNode([left_side, right_side])
          else:
            return left_side

    def Sum(self):
        if self.accept('PLUS'):
          invert = False
        elif self.accept('MINUS'):
          invert = True
        else:
          raise Exception('Syntax error')
        
        left_side = self.expect(self.Term)
        right_side = None
        if self.current.type == 'PLUS' or self.current.type == 'MINUS':
          right_side = self.Sum()
        
        return self.sum(left_side, right_side, invert)

    def sum(self, left_side, right_side, invert_left_side):
      left_side_with_sginal = left_side
      if invert_left_side:
        left_side_with_sginal = InversorNode(left_side)
      if right_side is None:
        return left_side_with_sginal
      return AddNode([left_side_with_sginal, right_side])

    def Term(self):
        l = self.Power()
        if self.accept('TIMES'):
            r = self.expect(self.Term)
            return MultiNode([l, r])
        return l
    
    def Power(self):
      l = self.Base()
      if self.accept('POWER'):
          r = self.expect(self.Power)
          return PowerNode([l, r])
      return l

    def Base(self):
        child = self.Factor()
        if self.accept('DERIVATIVE'):
            return DerivativeNode(child)
        return child

    def Factor(self):
        if self.accept('OPEN_PARENTHESIS'):
            r = self.expect(self.Expression)
            if self.accept('CLOSE_PARENTHESIS'):
                return r
            return None

        current_value = self.current.value
        if self.accept('LITERAL'):
          node = LiteralNode(current_value)
          return node
        elif self.accept('NAME'):
          node = ObjectNode(current_value)
          return node

        raise Exception('Syntax error')
        

In [153]:
# Casos triviais
tree = Parser('2').parse()
assert tree.print() == "2"
assert '%.2f' % tree.evaluate({}) == '2.00'

tree = Parser('+2').parse()
assert tree.print() == "2"
assert '%.2f' % tree.evaluate({}) == '2.00'

tree = Parser('  - 2 ').parse()
assert tree.print() == "-(2)"
assert '%.2f' % tree.evaluate({}) == '-2.00'

tree = Parser('  - x ').parse()
assert tree.print() == "-(x)"

In [154]:
# Operações básicas
tree = Parser('1*23').parse()
assert tree.print() == "*(1, 23)"
assert '%.2f' % tree.evaluate({}) == '23.00'

tree = Parser('25+   4.74').parse()
assert tree.print() == "+(25, 4.74)"
assert '%.2f' % tree.evaluate({}) == '29.74'

tree = Parser('  32 - 15 ').parse()
assert tree.print() == "+(32, -(15))"
assert '%.2f' % tree.evaluate({}) == '17.00'


tree = Parser('  2^5 ').parse()
assert tree.print() == "^(2, 5)"
assert '%.2f' % tree.evaluate({}) == '32.00'

In [155]:
# Ordem das operações
tree = Parser('1*23+5').parse()
assert tree.print() == "+(*(1, 23), 5)"
assert '%.2f' % tree.evaluate({}) == '28.00'

tree = Parser('1 * (23+5)').parse()
assert tree.print() == "*(1, +(23, 5))"
assert '%.2f' % tree.evaluate({}) == '28.00'

In [156]:
# Exemplos complexos - somente parsing, sem avaliação
tree = Parser('k*(x + y)*(x - y)').parse()
assert tree.print() == "*(k, *(+(x, y), +(x, -(y))))"

tree = Parser('(x + t)*x + 2*t').parse()
assert tree.print() == "+(*(+(x, t), x), *(2, t))"

tree = Parser('(x+Ay)*(x^2-A-z+t2)').parse()
assert tree.print() == "*(+(x, Ay), +(^(x, 2), +(-(A), +(-(z), t2))))"

In [157]:
# Exemplos com derivada
tree = Parser('k*(x + y)*(x - y)\'').parse()
assert tree.print() == "*(k, *(+(x, y), '(+(x, -(y)))))"

tree = Parser('5\'').parse()
assert tree.print() == "'(5)"

In [158]:
# Exemplos de falha
def assertRaises(f, expected_exception):
  exception = None
  try:
    f()
  except Exception as e:
    exception = e
  assert str(exception) == expected_exception

syntaxError = 'Syntax error'
assertRaises(Parser('x + ').parse, syntaxError)
assertRaises(Parser('+').parse, syntaxError)
assertRaises(Parser('5*').parse, syntaxError)
assertRaises(Parser("'").parse, syntaxError)


In [159]:
# Método que encapsula extração de objetos + parsing + avaliação de um código na 
# linguagem definida
def parse_code(code):
  expression, objects = preprocess(code)
  expression_tree = Parser(expression).parse()
  result = expression_tree.evaluate(objects)
  result = result.expand()
  return result

In [160]:
from sympy import Symbol
from sympy.printing.latex import LatexPrinter
from sympy.core.function import UndefinedFunction

# Implementa um override do LatexPrinter mais claro para funções de somente
# uma variável (não deixa explícita a variável em relação a qual se está derivando)
class SimpleLatexPrinter(LatexPrinter):
    def _print_Derivative(self, expr):
        function, *vars = expr.args
        variable = vars[0]
        derivative_order = variable[1]
        return "{}{}".format(
            self._print(Symbol(function.func.__name__)),
                        ('\''*derivative_order) )
    
    def _print_Function(self, expr, exp=None):
        function, *vars = expr.args
        name = expr.func.__name__
        if exp is not None and exp != '1.0':
            return r"%s^{%s}" % (name, exp)
        return "%s" % (expr.func.__name__)

sympy.init_printing(latex_printer=SimpleLatexPrinter().doprint)

In [176]:
from IPython.display import display, Markdown

# Encapsula a escrita do resultado
def print_result(result):
  simpleLatex = SimpleLatexPrinter().doprint(result)
  display(Markdown('${latex}$'.format(latex=simpleLatex)))

In [177]:
example01 = """

variable x
constants A, B, C
functions f
(A + x + f)'

"""
result = parse_code(example01)
print_result(result)

$f' + 1$

In [178]:
example02 = """

variable t
constants k
functions x, y
k*(x + y)*(x - y)

"""
result = parse_code(example02)
print_result(result)

$k x^{2} - k y^{2}$

In [179]:
example03 = """

variable t
constants k
functions x, y
(x + t)*x + 2*t

"""
result = parse_code(example03)
print_result(result)

$t x + 2.0 t + x^{2}$

In [180]:
example04 = """

variable t2
constants A
functions x, y, z
(x+A*y)*(x^2-A-z+t2)'

"""
result = parse_code(example04)
print_result(result)

$2.0 A x y x' - A y z' + A y - x z' + x + 2.0 x^{2.0} x'$