In [14]:
import javalang
import os
import graphviz

In [15]:
def read_code_file(file_path):
    try:
        with open(file_path, "r", encoding="utf-8") as file:
            return file.read()
    except UnicodeDecodeError as e:
        print(f"Error reading file {file_path}: {e}")
        return None

In [16]:
def create_ast(code):
    try:
        tree = javalang.parse.parse(code)
        return tree
    except javalang.parser.JavaSyntaxError as e:
        print(f"Syntax error in code: {e}")
        return None

In [17]:
def visualize_ast(java_code):
    tree = javalang.parse.parse(java_code)
    dot = graphviz.Digraph(comment="Java AST")

    # Keep track of node ids
    node_id = 0
    node_ids = {}

    def add_node(node, parent_id=None):
        nonlocal node_id
        current_id = node_id
        node_id += 1

        # Create node label based on node type and attributes
        node_type = type(node).__name__
        label = f"{node_type}"

        # Add additional info for certain node types
        if hasattr(node, "name"):
            label += f"\nname: {node.name}"
        elif hasattr(node, "value"):
            label += f"\nvalue: {node.value}"

        # Add node to graph
        dot.node(str(current_id), label)

        # Connect to parent if exists
        if parent_id is not None:
            dot.edge(str(parent_id), str(current_id))

        # Process children
        for name, value in node.__dict__.items():
            if name.startswith("_"):
                continue

            if isinstance(value, list):
                for item in value:
                    if hasattr(item, "__dict__"):
                        add_node(item, current_id)
            elif hasattr(value, "__dict__"):
                add_node(value, current_id)

    # Start with the CompilationUnit
    add_node(tree)
    return dot

In [18]:
dataset_path = "../datasets/conplag_preprocessed"

for file in os.listdir(dataset_path):
    print(f"Processing {file}...")
    code = read_code_file(os.path.join(dataset_path, file))
    if code:
        dot = visualize_ast(code)
        dot.render("java_ast", format="png", cleanup=True)
        print("AST visualization saved as 'java_ast.png'")
        ast = create_ast(code)
        if ast:
            print(f"AST for {file} created successfully.")
            print(ast)
        else:
            print(f"Failed to create AST for {file}.")
    else:
        print(f"Failed to read code from {file}.")
    break

Processing 0017d438.java...


ExecutableNotFound: failed to execute WindowsPath('dot'), make sure the Graphviz executables are on your systems' PATH