In [1]:
import pandas as pd
from tree_sitter import Language, Parser

import os
os.chdir('/home/dev/function_parser/')

from function_parser.language_data import LANGUAGE_METADATA
from process import DataProcessor

ModuleNotFoundError: No module named 'parsers'

In [0]:
! rm -rf mkdir /home/dev/.vendor 
! mkdir /home/dev/.vendor
! rm -rf mkdir /home/dev/.build
! mkdir /home/dev/.build

! git clone https://github.com/tree-sitter/tree-sitter-python /home/dev/.vendor/tree-sitter-python

In [0]:
Language.build_library(
  # Store the library in the `build` directory
  '/home/dev/.build/py-tree-sitter-languages.so',

  # Include one or more languages
  [
    #'vendor/tree-sitter-go',
    #'vendor/tree-sitter-javascript',
    '/home/dev/.vendor/tree-sitter-python'
  ]
)

In [0]:
language_name = 'python'

language = Language('/home/dev/.build/py-tree-sitter-languages.so', language_name)
parser = Parser()
parser.set_language(language)

In [34]:
code = """
def foo():
    if bar:
        baz()
    if bar2:
        baz2()
    i = 0
    k = 1
    k = i + k
"""
blob = bytes(code, "utf8")
blob[0:15]


b'\ndef foo():\n   '

In [35]:
tree = parser.parse(blob)

In [36]:
def print_ast(cursor, blob, depth = 0):
    depth+=1
    # print(dir(cursor.node))
    if cursor.node.type == 'identifier':
        name = blob[cursor.node.start_byte:cursor.node.end_byte].decode("utf8")
        print(' '*depth + name)
    else:
        print(' '*depth + cursor.node.type) 
    
    if cursor.goto_first_child():
        while cursor:
            print_ast(cursor, blob, depth)
            if not cursor.goto_next_sibling():
                break
        cursor.goto_parent()



In [37]:
cursor = tree.walk()
print_ast(cursor, blob)


module
  function_definition
   def
   foo
   parameters
    (
    )
   :
   block
    if_statement
     if
     bar
     :
     block
      expression_statement
       call
        baz
        argument_list
         (
         )
    if_statement
     if
     bar2
     :
     block
      expression_statement
       call
        baz2
        argument_list
         (
         )
    expression_statement
     assignment
      expression_list
       i
      =
      expression_list
       integer
    expression_statement
     assignment
      expression_list
       k
      =
      expression_list
       integer
    expression_statement
     assignment
      expression_list
       k
      =
      expression_list
       binary_operator
        i
        +
        k


In [39]:
def ast2graph(cursor, blob, depth = 0):
    depth+=1
    # print(dir(cursor.node))
    if cursor.node.type == 'identifier':
        name = blob[cursor.node.start_byte:cursor.node.end_byte].decode("utf8")
        print(' '*depth + name)
    else:
        print(' '*depth + cursor.node.type) 
    
    if cursor.goto_first_child():
        while cursor:
            print_tree(cursor, blob, depth)
            if not cursor.goto_next_sibling():
                break
        cursor.goto_parent()

In [40]:
from function_parser.parsers.ast_graph_generator import AstGraphGenerator
from ast import parse

In [41]:
visitor = AstGraphGenerator()
visitor.visit(parse(code))

edge_list = [(t, origin, destination)
                         for (origin, destination), edges
                         in visitor.graph.items() for t in edges]

graph_node_labels = [label.strip() for (_, label) in sorted(visitor.node_label.items())]
graph_node_labels

['FunctionDef',
 'def',
 'foo',
 '(',
 ')',
 ':',
 'If',
 'if',
 'Name',
 'bar',
 ':',
 'Expr',
 'Call',
 'Name',
 'baz',
 '(',
 ')',
 'If',
 'if',
 'Name',
 'bar2',
 ':',
 'Expr',
 'Call',
 'Name',
 'baz2',
 '(',
 ')',
 'Assign',
 'Name',
 'i',
 '=',
 'Num',
 '0',
 'Assign',
 'Name',
 'k',
 '=',
 'Num',
 '1',
 'Assign',
 'Name',
 'k',
 '=',
 'BinOp',
 'Name',
 'i',
 '+',
 'Name',
 'k']

In [42]:
edge_list 

[('child', 0, 1),
 ('child', 0, 2),
 ('NextToken', 1, 2),
 ('child', 0, 3),
 ('NextToken', 2, 3),
 ('child', 0, 4),
 ('NextToken', 3, 4),
 ('child', 0, 5),
 ('NextToken', 4, 5),
 ('child', 0, 6),
 ('child', 6, 7),
 ('NextToken', 5, 7),
 ('child', 6, 8),
 ('child', 8, 9),
 ('NextToken', 7, 9),
 ('child', 6, 10),
 ('NextToken', 9, 10),
 ('child', 6, 11),
 ('child', 11, 12),
 ('child', 12, 13),
 ('child', 13, 14),
 ('NextToken', 10, 14),
 ('child', 12, 15),
 ('NextToken', 14, 15),
 ('child', 12, 16),
 ('NextToken', 15, 16),
 ('child', 0, 17),
 ('child', 17, 18),
 ('NextToken', 16, 18),
 ('child', 17, 19),
 ('child', 19, 20),
 ('NextToken', 18, 20),
 ('child', 17, 21),
 ('NextToken', 20, 21),
 ('child', 17, 22),
 ('child', 22, 23),
 ('child', 23, 24),
 ('child', 24, 25),
 ('NextToken', 21, 25),
 ('child', 23, 26),
 ('NextToken', 25, 26),
 ('child', 23, 27),
 ('NextToken', 26, 27),
 ('child', 0, 28),
 ('child', 28, 29),
 ('child', 29, 30),
 ('NextToken', 27, 30),
 ('child', 28, 31),
 ('Next