<a href="https://colab.research.google.com/github/rogerioag/rea-comp04-compiladores/blob/main/jupyter-notebooks/02-comp-analise-sintatica-cmmparser.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Análise Sintática

In [None]:
!pip install ply
!pip install anytree
!pip install graphviz
!pip install llvmlite
!jupyter nbextension install https://rawgit.com/jfbercher/small_nbextensions/master/highlighter.zip  --user
!jupyter nbextension enable highlighter/highlighter

Collecting ply
[?25l  Downloading https://files.pythonhosted.org/packages/a3/58/35da89ee790598a0700ea49b2a66594140f44dec458c07e8e3d4979137fc/ply-3.11-py2.py3-none-any.whl (49kB)
[K     |██████▋                         | 10kB 13.6MB/s eta 0:00:01[K     |█████████████▏                  | 20kB 19.1MB/s eta 0:00:01[K     |███████████████████▉            | 30kB 11.3MB/s eta 0:00:01[K     |██████████████████████████▍     | 40kB 9.0MB/s eta 0:00:01[K     |████████████████████████████████| 51kB 2.9MB/s 
[?25hInstalling collected packages: ply
Successfully installed ply-3.11
Collecting anytree
[?25l  Downloading https://files.pythonhosted.org/packages/a8/65/be23d8c3ecd68d40541d49812cd94ed0f3ee37eb88669ca15df0e43daed1/anytree-2.8.0-py2.py3-none-any.whl (41kB)
[K     |████████████████████████████████| 51kB 3.1MB/s 
Installing collected packages: anytree
Successfully installed anytree-2.8.0
Downloading: https://rawgit.com/jfbercher/small_nbextensions/master/highlighter.zip -> /tmp/tm

In [None]:
! git clone https://github.com/rogerioag/rea-comp04-compiladores.git
! cp -R rea-comp04-compiladores/cmmcompiler/* .
! cp -R rea-comp04-compiladores/cmmcompiler/tests/* .


Cloning into 'rea-comp04-compiladores'...
remote: Enumerating objects: 251, done.[K
remote: Counting objects: 100% (251/251), done.[K
remote: Compressing objects: 100% (211/211), done.[K
remote: Total 251 (delta 122), reused 127 (delta 33), pack-reused 0[K
Receiving objects: 100% (251/251), 556.20 KiB | 6.70 MiB/s, done.
Resolving deltas: 100% (122/122), done.


In [None]:
from sys import argv, exit

import logging

logging.basicConfig(
     level = logging.DEBUG,
     filename = "log.txt",
     filemode = "w",
     format = "%(filename)10s:%(lineno)4d:%(message)s"
)
log = logging.getLogger()

In [None]:
import ply.yacc as yacc
from lexer import tokens
import re as regex

from lexer import TOKENS_SYMBOLS
from tree import TreeNode

In [None]:
# from .declarations import *

def p_declaration_list(parser):
    """declaration-list : declaration-list declaration
                        | declaration
    """
    parser[0] = TreeNode(id='DECLARATION_LIST')

    [node, subtree] = parser[:2]

    if len(parser) > 2:
        node.insert_nodes(subtree.nodes())
        [dec] = parser[2:3]
        node.insert_node(dec)
        pass
    else:
        node.insert_node(subtree)
    pass

def p_declaration(parser):
    """declaration : var-declaration
                   | fun-declaration
    """
    parser[0] = TreeNode(id='DECLARATION')

    [node, subtree] = parser

    node.insert_node(subtree)
    pass

def p_var_declaration(parser):
    """var-declaration : type-specifier id SEMICOLON
                       | type-specifier id LBRACKETS number RBRACKETS
    """
    parser[0] = TreeNode(id='VAR_DECLARATION')

    [node, type_spec, id_node, symbol] = parser[:4]

    node.insert_node(type_spec)
    node.insert_node(id_node)


    if TOKENS_SYMBOLS.get('SEMICOLON') == symbol:
        node.insert_node(TreeNode(id='SEMICOLON', raw=TOKENS_SYMBOLS.get('SEMICOLON')))
        pass
    elif TOKENS_SYMBOLS.get('LBRACKETS') == symbol:
        node.insert_node(TreeNode(id='LBRACKETS', raw=TOKENS_SYMBOLS.get('LBRACKETS')))
        
        [number] = parser[4:5]
        node.insert_node(number)

        node.insert_node(TreeNode(id='RBRACKETS', raw=TOKENS_SYMBOLS.get('RBRACKETS')))
        pass

    pass

def p_fun_declaration(parser):
    """fun-declaration : type-specifier id LPAREN params RPAREN compound-stmt"""
    parser[0] = TreeNode(id='FUN_DECLARATION')

    [node, type_spec, id_node, _, params, _, compound] = parser

    node.insert_node(type_spec)
    node.insert_node(id_node)
    node.insert_node(TreeNode(id='LPAREN', raw=TOKENS_SYMBOLS.get('LPAREN')))
    node.insert_node(params)
    node.insert_node(TreeNode(id='RPAREN', raw=TOKENS_SYMBOLS.get('RPAREN')))
    node.insert_node(compound)

    pass

def p_local_declarations(parser):
    """local-declarations : local-declarations var-declaration
                          | empty
    """
    parser[0] = TreeNode(id='LOCAL_DECLARATIONS')

    [node, subtree] = parser[:2]

    node.insert_node(subtree)

    if len(parser) > 2:
        [var] = parser[2:3]
        node.insert_node(var)
        pass
    pass


In [None]:
# from .expressions import *

def p_expressions(parser):
    """expression : var ATTRIBUTION expression
                  | simple-expression
    """
    parser[0] = TreeNode(id='EXPRESSION')

    [node, subtree] = parser[:2]

    node.insert_node(subtree)
    if len(parser) > 2:
        [_, exp] = parser[2:4]
        node.insert_node(TreeNode(id='ATTRIBUTION', raw=TOKENS_SYMBOLS.get('ATTRIBUTION')))
        node.insert_node(exp)
        pass
    pass

def p_simple(parser):
    """simple-expression : additive-expression relop additive-expression
                         | additive-expression
    """
    parser[0] = TreeNode(id='SIMPLE_EXPRESSION')

    [node, add] = parser[:2]

    node.insert_node(add)
    if len(parser) > 2:
        [relop, addi] = parser[2:4]
        node.insert_node(relop)
        node.insert_node(addi)
        pass
    pass

def p_additive(parser):
    """additive-expression : additive-expression addop term
                           | term
    """
    parser[0] = TreeNode(id='ADDITIVE_EXPRESSION')

    [node, subtree] = parser[:2]

    node.insert_node(subtree)
    if len(parser) > 2:
        [addop, term] = parser[2:4]
        node.insert_node(addop)
        node.insert_node(term)
        pass
    pass



In [None]:
# from .operations import *

def get_token_by_raw(raw):
    
    for (token, value) in TOKENS_SYMBOLS.items():
        if value == raw:
            return token
        pass

    return None


def p_relational(parser):
    """relop : LESS_EQUAL
             | LESS
             | GREATER
             | GREATER_EQUAL
             | EQUALS
             | DIFFERENT
    """
    [_, raw] = parser
    parser[0] = TreeNode(id=get_token_by_raw(raw), raw=raw)
    pass

def p_addition(parser):
    """addop : PLUS
             | MINUS
    """
    [_, raw] = parser
    parser[0] = TreeNode(id=get_token_by_raw(raw), raw=raw)
    pass

def p_multiplication(parser):
    """mulop : TIMES
             | DIVIDE
    """
    [_, raw] = parser
    parser[0] = TreeNode(id=get_token_by_raw(raw), raw=raw)
    pass


In [None]:
# from .params import *
def p_params(parser):
    """params : param-list
              | void
    """
    parser[0] = TreeNode(id='PARAMS')

    [node, subtree] = parser
    node.insert_node(subtree)
    pass

def p_param_list(parser):
    """param-list : param-list COMMA param
                  | param
    """
    parser[0] = TreeNode(id='PARAM_LIST')

    [node, subtree] = parser[:2]

    if len(parser) > 2:
        node.insert_nodes(subtree.nodes())

        [_, param] = parser[2:4]
        node.insert_node(TreeNode(id='COMMA', raw=TOKENS_SYMBOLS.get('COMMA')))
        node.insert_node(param)
    else:
        node.insert_node(subtree)


    pass

def p_param(parser):
    """param : type-specifier id
             | type-specifier id LBRACKETS RBRACKETS
    """
    parser[0] = TreeNode(id='PARAM')

    [node, subtree, id_node] = parser[:3]
    node.insert_node(subtree)
    node.insert_node(id_node)

    if len(parser) > 3:
        node.insert_node(TreeNode(id='LBRACKETS', raw=TOKENS_SYMBOLS.get('LBRACKETS')))
        node.insert_node(TreeNode(id='RBRACKETS', raw=TOKENS_SYMBOLS.get('RBRACKETS')))
        pass

    pass


In [None]:
# from .types import *

def p_type_specifier(parser):
    """type-specifier : int
                      | void
    """
    parser[0] = TreeNode(id='TYPE_SPECIFIER')
    [node, type_node] = parser

    node.insert_node(type_node)
    pass

def p_void(parser):
    """void : VOID """
    [_, raw] = parser
    parser[0] = TreeNode(id='VOID', raw=raw)
    pass

def p_int(parser):
    """int : INT """
    [_, raw] = parser
    parser[0] = TreeNode(id='INT', raw=raw)
    pass

In [None]:
# from .statements import *

def p_statement_list(parser):
    """statement-list : statement-list statement
                      | empty
    """
    parser[0] = TreeNode(id='STATEMENT_LIST')

    [node, subtree] = parser[:2]

    if len(parser) > 2:
        node.insert_nodes(subtree.nodes())
        [stmt] = parser[2:3]
        node.insert_node(stmt)
        pass
    pass

def p_statement(parser):
    """statement : expression-stmt
                 | compound-stmt
                 | selection-stmt
                 | iteration-stmt
                 | return-stmt
    """
    parser[0] = TreeNode(id='STATEMENT')

    [node, subtree] = parser

    node.insert_node(subtree)
    pass

def p_expression(parser):
    """expression-stmt : expression SEMICOLON
                       | SEMICOLON
    """
    parser[0] = TreeNode(id='EXPRESSION_STMT')

    [node, leaf] = parser[:2]


    if TOKENS_SYMBOLS.get('SEMICOLON') == leaf:
        leaf = TreeNode(id='SEMICOLON', raw=leaf)


    node.insert_node(leaf)

    if len(parser) > 2:
        node.insert_node(TreeNode(id='SEMICOLON', raw=TOKENS_SYMBOLS.get('SEMICOLON')))
        pass

    pass

def p_compound(parser):
    """compound-stmt : LBRACES local-declarations statement-list RBRACES"""
    parser[0] = TreeNode(id='COMPOUND_STMT')

    [node, _, local_decl, stmt_list, _] = parser

    node.insert_node(TreeNode(id='LBRACES', raw=TOKENS_SYMBOLS.get('LBRACES')))
    node.insert_node(local_decl)
    node.insert_node(stmt_list)
    node.insert_node(TreeNode(id='RBRACES', raw=TOKENS_SYMBOLS.get('RBRACES')))
    
    pass

def p_selection(parser):
    """selection-stmt : IF LPAREN expression RPAREN statement
                      | IF LPAREN expression RPAREN statement ELSE statement
    """
    parser[0] = TreeNode(id='SELECTION_STMT')

    [node, _, _, exp, _, stmt] = parser[:6]

    node.insert_node(TreeNode(id='IF', raw=TOKENS_SYMBOLS.get('IF')))
    node.insert_node(TreeNode(id='LPAREN', raw=TOKENS_SYMBOLS.get('LPAREN')))
    node.insert_node(exp)
    node.insert_node(TreeNode(id='RPAREN', raw=TOKENS_SYMBOLS.get('RPAREN')))
    node.insert_node(stmt)

    if len(parser) > 6:
        [_, stmt] = parser[6:8]
        node.insert_node(TreeNode(id='ELSE', raw=TOKENS_SYMBOLS.get('ELSE')))
        node.insert_node(stmt)
        pass

    pass

def p_iteration(parser):
    """iteration-stmt : WHILE LPAREN expression RPAREN statement"""
    parser[0] = TreeNode(id='ITERATION_STMT')

    [node, _, _, exp, _, stmt] = parser

    node.insert_node(TreeNode(id='WHILE', raw=TOKENS_SYMBOLS.get('WHILE')))
    node.insert_node(TreeNode(id='LPAREN', raw=TOKENS_SYMBOLS.get('LPAREN')))
    node.insert_node(exp)
    node.insert_node(TreeNode(id='RPAREN', raw=TOKENS_SYMBOLS.get('RPAREN')))
    node.insert_node(stmt)
    pass

def p_return(parser):
    """return-stmt : RETURN SEMICOLON
                   | RETURN expression SEMICOLON
    """
    parser[0] = TreeNode(id='RETURN_STMT')

    [node, _, leaf] = parser[:3]

    if TOKENS_SYMBOLS.get('SEMICOLON') == leaf:
        leaf = TreeNode(id='SEMICOLON', raw=leaf)

    node.insert_node(TreeNode(id='RETURN', raw=TOKENS_SYMBOLS.get('RETURN')))

    node.insert_node(leaf)

    if len(parser) > 3:
        leaf = TreeNode(id='SEMICOLON', raw=TOKENS_SYMBOLS.get('SEMICOLON'))

    pass


In [None]:

def p_program(parser):
    """program : declaration-list"""
    parser[0] = TreeNode(id='PROGRAM')

    [node, declaration_list] = parser

    node.insert_node(declaration_list)
    pass

def p_var(parser):
    """var : id
           | id LBRACKETS expression RBRACKETS
    """
    parser[0] = TreeNode(id='VAR')

    [node, id_node] = parser[:2]

    node.insert_node(id_node)

    if len(parser) > 2:
        [_, expression] = parser[2:4]
        node.insert_node(TreeNode(id='LBRACKETS', raw=TOKENS_SYMBOLS.get('LBRACKETS')))
        node.insert_node(expression)
        node.insert_node(TreeNode(id='RBRACKETS', raw=TOKENS_SYMBOLS.get('RBRACKETS')))
        pass
    pass

def p_term(parser):
    """term : mulop factor
            | factor
    """
    parser[0] = TreeNode(id='TERM')

    [node, leaf] = parser[:2]
    node.insert_node(leaf)

    if len(parser) > 2:
        [factor] = parser[2:3]
        node.insert_node(factor)
        pass
    pass

def p_factor(parser):
    """factor : LPAREN expression RPAREN
              | var
              | call
              | number
    """
    parser[0] = TreeNode(id='FACTOR')

    [node, leaf] = parser[:2]

    if TOKENS_SYMBOLS.get('LPAREN') == leaf:
        [exp] = parser[2:3]

        node.insert_node(TreeNode(id='LPAREN', raw=TOKENS_SYMBOLS.get('LPAREN')))
        node.insert_node(exp)
        node.insert_node(TreeNode(id='RPAREN', raw=TOKENS_SYMBOLS.get('RPAREN')))
    else:
        node.insert_node(leaf)
        pass

    pass

def p_call(parser):
    """call : id LPAREN args RPAREN"""
    parser[0] = TreeNode(id='CALL')

    [node, id_raw, _, args] = parser

    id_node = TreeNode(id='ID')
    id_node.insert_node(TreeNode(raw=id_raw))

    node.insert_node(id_node)
    node.insert_node(TreeNode(id='LPAREN', raw=TOKENS_SYMBOLS.get('LPAREN')))
    node.insert_node(args)
    node.insert_node(TreeNode(id='RPAREN', raw=TOKENS_SYMBOLS.get('RPAREN')))
    pass

def p_id(parser):
    """id : ID"""
    parser[0] = TreeNode(id='ID')
    [node, id_raw] = parser

    node.insert_node(TreeNode(raw=id_raw))
    pass

def p_number(parser):
    """number : NUMBER"""
    parser[0] = TreeNode(id='NUMBER')
    [node, number] = parser

    node.insert_node(TreeNode(raw=number))
    pass

def p_args(parser):
    """args : arg-list
            | empty
    """
    parser[0] = TreeNode(id='ARGS')
    [node, leaf] = parser

    node.insert_node(leaf)
    pass

def p_arg_list(parser):
    """arg-list : arg-list SEMICOLON expression
                | expression
    """
    parser[0] = TreeNode(id='ARG_LIST')
    [node, leaf] = parser[:2]

    node.insert_node(leaf)
    if len(parser) > 2:
        [_, exp] = parser[2:4]
        node.insert_node(TreeNode(id='SEMICOLON', raw=TOKENS_SYMBOLS.get('SEMICOLON')))
        node.insert_node(exp)
        pass
    pass

def p_empty(parser):
    """empty :"""
    parser[0] = TreeNode(id='EMPTY')
    pass

def p_error(parser):

    print(parser)
    pass

In [None]:
# Programa principal.
def main():
    argv[1] = 'prog-002.cm'
    aux = argv[1].split('.')
    if aux[-1] != 'cm':
      raise IOError("Not a .cm file!")
    data = open(argv[1])

    source_file = data.read()
    parser.parse(source_file)

    print("Parsing...", argv[1])

In [None]:
# Build the parser.
__file__ = "02-comp-analise-sintatica-cmmparser.ipynb"

parser = yacc.yacc(method="LALR", optimize=True, start='program', debug=True, debuglog=log, write_tables=False, tabmodule='cmm_parser_tab')
# parser = yacc.yacc(start='program')

if __name__ == "__main__":
    main()

    

LexToken(RBRACES,'}',68,188)


Generating LALR tables


ValueError: ignored

In [None]:
! python main.py -p prog-002.cm

LexToken(RBRACES,'}',2,188)
Traceback (most recent call last):
  File "main.py", line 17, in <module>
    program = parser.parse(as_str)
  File "/usr/local/lib/python3.7/dist-packages/ply/yacc.py", line 333, in parse
    return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc)
  File "/usr/local/lib/python3.7/dist-packages/ply/yacc.py", line 1120, in parseopt_notrack
    p.callable(pslice)
  File "/content/parser/grammar/__init__.py", line 82, in p_call
    [node, id_raw, _, args] = parser
ValueError: too many values to unpack (expected 4)
