In [2]:
import ast
from pathlib import Path

In [3]:
filename = Path("main.py")
content = filename.read_text()

In [4]:
tree = ast.parse(content)

In [11]:
print(ast.dump(tree, indent=2))

Module(
  body=[
    ImportFrom(
      module='utils',
      names=[
        alias(name='greeting')],
      level=0),
    FunctionDef(
      name='main',
      args=arguments(
        posonlyargs=[],
        args=[],
        kwonlyargs=[],
        kw_defaults=[],
        defaults=[]),
      body=[
        Expr(
          value=Call(
            func=Name(id='greeting', ctx=Load()),
            args=[
              Constant(value='Hello World')],
            keywords=[]))],
      decorator_list=[])],
  type_ignores=[])


## Extracting function definitions

In [14]:
def extract_function_names(parsed_ast):
    function_names = []
    for node in ast.walk(parsed_ast):
        if isinstance(node, ast.FunctionDef):
            function_names.append(node.name)
    return function_names

In [13]:
extract_function_names(tree)

['main']

In [15]:
files = Path(".").glob("*.py")
for file in files:
    content = file.read_text()
    tree = ast.parse(content)
    print(f"{file}: {extract_function_names(tree)}")

main.py: ['main']
utils.py: ['greeting']


## Extract called functions

In [31]:
from collections import defaultdict

def extract_function_calls(parsed_ast):

    # Dictionary to store function_definition_name: [list of functions called within it]
    func_calls = defaultdict(list)

    # Traverse the AST to find function definitions
    for node in ast.walk(parsed_ast):
        if isinstance(node, ast.FunctionDef):
            function_name = node.name

            # Find all function calls within this function definition
            for sub_node in ast.walk(node):
                if isinstance(sub_node, ast.Call) and isinstance(sub_node.func, ast.Name):
                    called_function_name = sub_node.func.id
                    func_calls[function_name].append(called_function_name)

    return func_calls

In [32]:
files = Path(".").glob("*.py")
for file in files:
    content = file.read_text()
    tree = ast.parse(content)
    print(f"{file}: {extract_function_calls(tree)}")

main.py: defaultdict(<class 'list'>, {'main': ['greeting']})
utils.py: defaultdict(<class 'list'>, {'greeting': ['print']})


# Crawl

In [19]:
function_definitions = {}
for file in Path(".").glob("*.py"):
    # parse ast
    content = file.read_text()
    tree = ast.parse(content)

    #
    for func_definition in extract_function_names(tree):
        function_definitions[func_definition] = file

In [20]:
function_definitions

{'main': WindowsPath('main.py'), 'greeting': WindowsPath('utils.py')}

In [22]:
def get_function_source(file_path, function_name):
    # Read the file content and parse it into an AST
    with open(file_path, 'r') as f:
        file_content = f.read()
    parsed_ast = ast.parse(file_content)

    # Find the function's node in the AST
    for node in ast.walk(parsed_ast):
        if isinstance(node, ast.FunctionDef) and node.name == function_name:
            # Get the starting and ending line numbers of the function
            start_line = node.lineno - 1  # Line numbers in AST are 1-based
            end_line = node.end_lineno

            # Read the file again to extract the function's source code
            with open(file_path, 'r') as f:
                lines = f.readlines()
            function_source = ''.join(lines[start_line:end_line])

            return function_source

    return None  # If the function is not found

In [25]:
function_contents = {}

for func_name, file_path in function_definitions.items():
    function_contents[func_name] = get_function_source(file_path, func_name)

In [26]:
print(function_contents)

{'main': 'def main():\n    greeting("Hello World")', 'greeting': 'def greeting(name):\n    print(name)\n'}


In [37]:
function_calls = {}
for file in Path(".").glob("*.py"):
    # parse ast
    content = file.read_text()
    tree = ast.parse(content)

    for name,call_list in  extract_function_calls(tree).items():
        function_calls[name] = call_list
        

In [38]:
function_calls

{'main': ['greeting'], 'greeting': ['print']}

In [43]:
with_code = {}

for func_name, call_list in function_calls.items():
    with_code[func_name] = [function_contents[func_name]] + [function_contents[call] for call in call_list if call in function_contents]

In [44]:
with_code

{'main': ['def main():\n    greeting("Hello World")',
  'def greeting(name):\n    print(name)\n'],
 'greeting': ['def greeting(name):\n    print(name)\n']}