In [40]:
import os
import json
from tree_sitter_languages import get_parser

In [41]:
def extract_ast_info(node, method_data):
    if node.type == 'method_declaration':
        if 'methods' not in method_data:
            method_data['methods'] = []
            
        method_info = {}
        
        method_name_node = node.child_by_field_name('name')
        if method_name_node:
            method_info['name'] = method_name_node.text.decode('utf-8')
            
        return_type_node = node.child_by_field_name('type')
        if return_type_node:
            method_info['return_type'] = return_type_node.text.decode('utf-8')
            
        parameters_node = node.child_by_field_name('parameters')
        if parameters_node:
            method_info['parameters'] = []
            for param in parameters_node.named_children:
                if param.type == 'formal_parameter':
                    param_type = param.child_by_field_name('type')
                    param_name = param.child_by_field_name('name')
                    if param_type and param_name:
                        method_info['parameters'].append({
                            'type': param_type.text.decode('utf-8'),
                            'name': param_name.text.decode('utf-8')
                        })

        method_info['start_position'] = node.start_byte
        method_info['end_position'] = node.end_byte
        
        body_node = node.child_by_field_name('body')
        if body_node:
            method_info['body_start'] = body_node.start_byte
            method_info['body_end'] = body_node.end_byte
        
        method_data['methods'].append(method_info)
        
    if node.type.endswith('_statement'):
        if 'code_constructs' not in method_data:
            method_data['code_constructs'] = []
        if node.type not in method_data['code_constructs']:
            method_data['code_constructs'].append(node.type)
    
    if node.type == 'method_invocation':
        method_name_node = node.child_by_field_name('name')
        receiver_node = node.child_by_field_name('object')
        
        if method_name_node:
            method_name = method_name_node.text.decode('utf-8')
            if 'method_calls' not in method_data:
                method_data['method_calls'] = {}
            
            class_name = 'unknown'
            if receiver_node:
                class_name = receiver_node.text.decode('utf-8')
                
            if class_name not in method_data['method_calls']:
                method_data['method_calls'][class_name] = []
            if method_name not in method_data['method_calls'][class_name]:
                method_data['method_calls'][class_name].append(method_name)
    
    if node.type == 'local_variable_declaration':
        declarators = node.named_children[1:]
        type_node = node.child_by_field_name('type')
        if type_node:
            class_name = type_node.text.decode('utf-8')
            for declarator in declarators:
                variable_node = declarator.child_by_field_name('name')
                if variable_node:
                    variable_name = variable_node.text.decode('utf-8')
                    if 'local_variables' not in method_data:
                        method_data['local_variables'] = {}
                    method_data['local_variables'][variable_name] = class_name
    
    for child in node.children:
        extract_ast_info(child, method_data)

def process_java_file(file_path, output_dir):
    with open(file_path, 'r') as file:
        java_code = file.read()
    tree = parser.parse(java_code.encode())

    ast_info={}
    extract_ast_info(tree.root_node, ast_info)

    file_name = os.path.splitext(os.path.basename(file_path))[0]
    output_file = os.path.join(output_dir, f"{file_name}_ast.json")
    os.makedirs(output_dir, exist_ok=True)
    with open(output_file, 'w') as f:
        json.dump(ast_info, f, indent=4)

def main(repo_path, output_dir):
    for root, dirs, files in os.walk(repo_path):
        for file in files:
            if file.endswith('.java'):
                file_path = os.path.join(root, file)
                process_java_file(file_path, output_dir)

if __name__ == "__main__":
    parser = get_parser('java') 
    base_path = "../../../"
    repo_path = f"{base_path}dataset/{project_name}"  
    output_dir = f"{base_path}output/{project_name}/AST"  
    main(repo_path, output_dir)

Processing ../../../dataset/NextCloud/app/src/androidTest/disabledTests/AuthenticatorActivityTest.java
Processing ../../../dataset/NextCloud/app/src/androidTest/disabledTests/uiautomator/InitialTest.java
Processing ../../../dataset/NextCloud/app/src/androidTest/java/com/nextcloud/client/UploadListActivityActivityIT.java
Processing ../../../dataset/NextCloud/app/src/androidTest/java/com/nextcloud/client/CommunityActivityIT.java
Processing ../../../dataset/NextCloud/app/src/androidTest/java/com/nextcloud/client/AuthenticatorActivityIT.java
Processing ../../../dataset/NextCloud/app/src/androidTest/java/com/nextcloud/client/FirstRunActivityIT.java
Processing ../../../dataset/NextCloud/app/src/androidTest/java/com/nextcloud/client/EndToEndAction.java
Processing ../../../dataset/NextCloud/app/src/androidTest/java/com/nextcloud/client/SyncedFoldersActivityIT.java
Processing ../../../dataset/NextCloud/app/src/androidTest/java/com/nextcloud/client/account/UserAccountManagerImplTest.java
Process