# Java Project AST Analysis using Python

이 노트북에서는 Python을 이용하여 Java 프로젝트의 소스 코드를 분석합니다. 분석 과정에서 간단한 AST(Abstract Syntax Tree) 형태를 생성하여 각 Java 파일의 클래스와 메서드 개수를 파악합니다. 네이티브 Java AST 라이브러리(`javalang` 등)가 제공되지 않아 간단한 파서와 자료 구조를 직접 구현합니다.

분석 결과로 각 파일 별 클래스 수, 메서드 수, 총 라인 수를 표 형태로 제공하며 전체 프로젝트 통계를 요약합니다.


In [None]:
import os
import re
import pandas as pd

class ASTNode:
    """A simple representation of a node in an AST."""
    def __init__(self, name, node_type, parent=None):
        self.name = name
        self.type = node_type
        self.parent = parent
        self.children = []
    
    def add_child(self, child):
        self.children.append(child)


def remove_comments(code: str) -> str:
    """
    Remove Java single-line and multi-line comments from code.
    """
    # Remove multi-line comments
    code = re.sub(r'/\*.*?\*/', '', code, flags=re.DOTALL)
    # Remove single-line comments
    code = re.sub(r'//.*', '', code)
    return code


def parse_java_file(filepath: str) -> dict:
    """
    Parse a Java file and return a dictionary with counts of classes and methods.
    A simplified AST is built by tracking braces and detecting class/method declarations.
    """
    with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
        code = f.read()
    
    # Remove comments to avoid false positives
    code_no_comments = remove_comments(code)
    lines = code_no_comments.split('
')
    
    root_node = ASTNode(os.path.basename(filepath), 'file')
    current_node = root_node
    stack = []  # To track nested scopes

    class_pattern = re.compile(r"(class|interface)\s+(\w+)")
    method_pattern = re.compile(r"(public|protected|private|static|final|synchronized|abstract|native|strictfp)?\s*"
                                r"([\w\<\>\[\]]+\s+)+"  # return type and generics
                                r"(\w+)\s*\(([^)]*)\)\s*\{")

    braces_count_stack = []
    
    # Counts
    class_count = 0
    method_count = 0

    # Track braces for scopes
    for line_no, line in enumerate(lines):
        stripped = line.strip()
        # Skip empty lines
        if not stripped:
            continue
        
        # Detect class or interface declarations
        m_class = class_pattern.search(stripped)
        if m_class:
            class_name = m_class.group(2)
            class_count += 1
            node = ASTNode(class_name, 'class', parent=current_node)
            current_node.add_child(node)
            # Push current node to stack and set new current
            stack.append(current_node)
            current_node = node
            braces_count_stack.append(0)
            # Continue to next line to avoid mis-detection of methods in same line
            continue
        
        # Detect method declarations (only when inside a class)
        if current_node.type == 'class':
            m_method = method_pattern.search(stripped)
            if m_method:
                method_name = m_method.group(3)
                method_count += 1
                method_node = ASTNode(method_name, 'method', parent=current_node)
                current_node.add_child(method_node)
                # For methods, we don't build deeper; we rely on braces to exit method
                stack.append(current_node)
                current_node = method_node
                braces_count_stack.append(0)
                continue
        
        # Update braces counts and detect scope exit
        open_braces = stripped.count('{')
        close_braces = stripped.count('}')
        if braces_count_stack:
            braces_count_stack[-1] += open_braces - close_braces
            # If braces count drops below zero, we exit current scope
            while braces_count_stack and braces_count_stack[-1] <= 0:
                braces_count_stack.pop()
                # pop current node and restore previous context
                if stack:
                    current_node = stack.pop()
                else:
                    current_node = root_node

    total_lines = len(lines)
    return {
        'file': filepath,
        'classes': class_count,
        'methods': method_count,
        'lines': total_lines,
        'ast': root_node
    }


def analyze_java_project(root_dir: str) -> pd.DataFrame:
    """
    Walk through the directory tree and analyze all Java files.
    Returns a DataFrame with counts per file.
    """
    results = []
    for dirpath, dirnames, filenames in os.walk(root_dir):
        for filename in filenames:
            if filename.lower().endswith('.java'):
                filepath = os.path.join(dirpath, filename)
                metrics = parse_java_file(filepath)
                results.append(metrics)
    
    df = pd.DataFrame(results)
    # If no java files found, return empty DataFrame
    return df


In [None]:

# Specify the path to your Java project. You can modify this path as needed.
project_path = '.'  # Current directory by default

# Perform analysis
analysis_df = analyze_java_project(project_path)

# Display results
import pandas as pd
if not analysis_df.empty:
    from IPython.display import display
    display(analysis_df[['file', 'classes', 'methods', 'lines']])
    
    # Summary statistics
    total_files = len(analysis_df)
    total_classes = analysis_df['classes'].sum()
    total_methods = analysis_df['methods'].sum()
    total_lines = analysis_df['lines'].sum()

    print(f"Java files analyzed: {total_files}")
    print(f"Total classes: {total_classes}")
    print(f"Total methods: {total_methods}")
    print(f"Total lines: {total_lines}")
else:
    print("No Java files found in the specified directory.")
