In [2]:
"""
py_ast_to_graph_json.py

Parse a Python source file with the std-lib ``ast`` module and emit a
*graph-style* JSON with

    {
      "nodes": [[idx, node_type, value], …],
      "edges": [[src, dst, edge_type], …]
    }

Edge-type legend
----------------
0  parent   → child              (AST structure)
1  child    → parent             (reverse)

2  node     → CONTENT_NODE       (variable / literal link)
3  CONTENT_NODE → node           (reverse)

4  leaf-token(i)   → leaf-token(i+1)   (sequential order)
5  leaf-token(i+1) → leaf-token(i)     (reverse)
"""

import ast
import json
import sys
from collections import defaultdict
from typing import Any, Dict, List, Tuple
from typing import Optional

# ── graph primitives ───────────────────────────────────────────────────────
Node = List[Any]           # [index, node_type, value]
Edge = Tuple[int, int, int]  # (src, dst, edge_type)


class GraphBuilder:
    """Walk a Python ``ast.AST`` and build nodes / edges."""

    # edge-type constants
    AST_FWD, VAR_FWD, ORD_FWD = 0, 2, 4

    def __init__(self) -> None:
        self.nodes: List[Node] = []
        self.edges: List[Edge] = []
        self._content_nodes: Dict[str, int] = {}      # value  → node-index
        self._leaf_order: List[int] = []              # indices of leaf tokens

    # ─────────────────────────── node helpers ────────────────────────────
    def _add_node(self, node_type: str, value: Any = None) -> int:
        idx = len(self.nodes)
        self.nodes.append([idx, node_type, str(value) if value is not None else "None"])
        return idx

    def _add_bi_edge(self, a: int, b: int, etype: int) -> None:
        self.edges.append((a, b, etype))
        self.edges.append((b, a, etype + 1))

    # ─────────────────────────── content edges ───────────────────────────
    def _link_content(self, src_idx: int, literal: Any) -> None:
        """
        Add a VAR_LINK edge pair (2/3) from *src_idx* to the canonical
        CONTENT_NODE representing *literal*.
        """
        if literal is None or isinstance(literal, (ast.AST, list)):
            return

        key = str(literal)
        if key not in self._content_nodes:
            cnt_idx = self._add_node("CONTENT_NODE", literal)
            self._content_nodes[key] = cnt_idx
        else:
            cnt_idx = self._content_nodes[key]

        self._add_bi_edge(src_idx, cnt_idx, self.VAR_FWD)

    # ───────────────────────────── walker ────────────────────────────────
    #def walk(self, obj: Any, parent_idx: int | None = None) -> int:
    
    def walk(self, obj: Any, parent_idx: Optional[int] = None) -> int:
        """
        Recursively convert *obj* (AST node / list / literal) into graph form.
        Returns the representative node-index for *obj*.
        """
        # 1) Genuine AST node ------------------------------------------------
        if isinstance(obj, ast.AST):
            idx = self._add_node(obj.__class__.__name__)
            if parent_idx is not None:
                self._add_bi_edge(parent_idx, idx, self.AST_FWD)

            # fields
            for field, value in ast.iter_fields(obj):
                if isinstance(value, list):
                    fld_idx = self._add_node(field)           # a field wrapper
                    self._add_bi_edge(idx, fld_idx, self.AST_FWD)
                    for elt in value:
                        self.walk(elt, fld_idx)
                    if not value:      # empty list still counts as a leaf
                        self._leaf_order.append(fld_idx)

                elif isinstance(value, ast.AST):
                    self.walk(value, idx)

                else:  # primitive value
                    lit_idx = self._add_node(field, value)
                    self._add_bi_edge(idx, lit_idx, self.AST_FWD)
                    self._link_content(lit_idx, value)
                    self._leaf_order.append(lit_idx)

            return idx

        # 2) List that slipped through --------------------------------------
        if isinstance(obj, list):
            wrapper_idx = self._add_node("list")
            if parent_idx is not None:
                self._add_bi_edge(parent_idx, wrapper_idx, self.AST_FWD)
            for elt in obj:
                self.walk(elt, wrapper_idx)
            if not obj:
                self._leaf_order.append(wrapper_idx)
            return wrapper_idx

        # 3) Bare literal ----------------------------------------------------
        lit_idx = self._add_node("Literal", obj)
        if parent_idx is not None:
            self._add_bi_edge(parent_idx, lit_idx, self.AST_FWD)
        self._link_content(lit_idx, obj)
        self._leaf_order.append(lit_idx)
        return lit_idx

    # ─────────────────────────── order edges ─────────────────────────────
    def add_order_links(self) -> None:
        """Add PREV_TOKEN edges (4/5) along the discovered leaf order."""
        for left, right in zip(self._leaf_order, self._leaf_order[1:]):
            self._add_bi_edge(left, right, self.ORD_FWD)

    # ─────────────────────────── final output ────────────────────────────
    def to_json(self) -> dict:
        return {"nodes": self.nodes, "edges": self.edges}


# ── CLI driver ─────────────────────────────────────────────────────────────
def main() -> None:
    if len(sys.argv) != 3:
        print("Usage: python py_ast_to_graph_json.py <input.py> <output.json>")
        sys.exit(1)

    src_path, out_path = sys.argv[1], sys.argv[2]

    with open(src_path, "r", encoding="utf-8") as fh:
        source = fh.read()

    tree = ast.parse(source, filename=src_path)

    gb = GraphBuilder()
    gb.walk(tree)
    gb.add_order_links()

    with open(out_path, "w", encoding="utf-8") as fh:
        json.dump(gb.to_json(), fh, indent=2, ensure_ascii=False)

    print(
        f"Wrote {len(gb.nodes)} nodes, {len(gb.edges)} edges "
        f"({out_path})"
    )


if __name__ == "__main__":
    main()

FileNotFoundError: [Errno 2] No such file or directory: '-f'

In [13]:
import ast
import json
from typing import Any, Dict, List, Tuple, Optional

# ── graph primitives ───────────────────────────────────────────────────────
Node = List[Any]           # [index, node_type, value]
Edge = Tuple[int, int, int]  # (src, dst, edge_type)


class GraphBuilder:
    """Walk a Python ``ast.AST`` and build nodes / edges."""

    # edge-type constants
    AST_FWD, VAR_FWD, ORD_FWD = 0, 2, 4

    def __init__(self) -> None:
        self.nodes: List[Node] = []
        self.edges: List[Edge] = []
        self._content_nodes: Dict[str, int] = {}      # value  → node-index
        self._leaf_order: List[int] = []              # indices of leaf tokens

    # ─────────────────────────── node helpers ────────────────────────────
    def _add_node(self, node_type: str, value: Any = None) -> int:
        idx = len(self.nodes)
        self.nodes.append([idx, node_type, str(value) if value is not None else "None"])
        return idx

    def _add_bi_edge(self, a: int, b: int, etype: int) -> None:
        self.edges.append((a, b, etype))
        self.edges.append((b, a, etype + 1))

    # ─────────────────────────── content edges ───────────────────────────
    def _link_content(self, src_idx: int, literal: Any) -> None:
        if literal is None or isinstance(literal, (ast.AST, list)):
            return

        key = str(literal)
        if key not in self._content_nodes:
            cnt_idx = self._add_node("CONTENT_NODE", literal)
            self._content_nodes[key] = cnt_idx
        else:
            cnt_idx = self._content_nodes[key]

        self._add_bi_edge(src_idx, cnt_idx, self.VAR_FWD)

    # ───────────────────────────── walker ────────────────────────────────
    def walk(self, obj: Any, parent_idx: Optional[int] = None) -> int:
        if isinstance(obj, ast.AST):
            idx = self._add_node(obj.__class__.__name__)
            if parent_idx is not None:
                self._add_bi_edge(parent_idx, idx, self.AST_FWD)

            for field, value in ast.iter_fields(obj):
                if isinstance(value, list):
                    fld_idx = self._add_node(field)
                    self._add_bi_edge(idx, fld_idx, self.AST_FWD)
                    for elt in value:
                        self.walk(elt, fld_idx)
                    if not value:
                        self._leaf_order.append(fld_idx)

                elif isinstance(value, ast.AST):
                    self.walk(value, idx)

                else:
                    lit_idx = self._add_node(field, value)
                    self._add_bi_edge(idx, lit_idx, self.AST_FWD)
                    self._link_content(lit_idx, value)
                    self._leaf_order.append(lit_idx)

            return idx

        if isinstance(obj, list):
            wrapper_idx = self._add_node("list")
            if parent_idx is not None:
                self._add_bi_edge(parent_idx, wrapper_idx, self.AST_FWD)
            for elt in obj:
                self.walk(elt, wrapper_idx)
            if not obj:
                self._leaf_order.append(wrapper_idx)
            return wrapper_idx

        lit_idx = self._add_node("Literal", obj)
        if parent_idx is not None:
            self._add_bi_edge(parent_idx, lit_idx, self.AST_FWD)
        self._link_content(lit_idx, obj)
        self._leaf_order.append(lit_idx)
        return lit_idx

    # ─────────────────────────── order edges ─────────────────────────────
    def add_order_links(self) -> None:
        for left, right in zip(self._leaf_order, self._leaf_order[1:]):
            self._add_bi_edge(left, right, self.ORD_FWD)

    # ─────────────────────────── final output ────────────────────────────
    def to_json(self) -> dict:
        return {"nodes": self.nodes, "edges": self.edges}


# ── برای اجرای مستقیم در نوت‌بوک ──────────────────────────────────────────
# def run_ast_to_graph(input_path: str, output_path: str) -> None:
#     with open(input_path, "r", encoding="utf-8") as fh:
#         source = fh.read()

#     tree = ast.parse(source, filename=input_path)

#     gb = GraphBuilder()
#     gb.walk(tree)
#     gb.add_order_links()

#     with open(output_path, "w", encoding="utf-8") as fh:
#         json.dump(gb.to_json(), fh, indent=2, ensure_ascii=False)

#     print(f"Wrote {len(gb.nodes)} nodes, {len(gb.edges)} edges to {output_path}")
import os

def run_ast_to_graph(input_path: str, output_path: str = None) -> None:
    # اگر مسیر خروجی داده نشده، خروجی رو با نام مشابه ورودی و پسوند .json بساز
    if output_path is None:
        base = os.path.basename(input_path)                     # youtube-dl_1_utils_buggy.py
        name_without_ext = os.path.splitext(base)[0]           # youtube-dl_1_utils_buggy
        output_dir = os.path.dirname(input_path)               # پوشه ورودی
        output_path = os.path.join(output_dir, f"{name_without_ext}.json")

    with open(input_path, "r", encoding="utf-8") as fh:
        source = fh.read()

    tree = ast.parse(source, filename=input_path)

    gb = GraphBuilder()
    gb.walk(tree)
    gb.add_order_links()

    with open(output_path, "w", encoding="utf-8") as fh:
        json.dump(gb.to_json(), fh, indent=2, ensure_ascii=False)

    print(f"Wrote {len(gb.nodes)} nodes, {len(gb.edges)} edges to {output_path}")


In [31]:
run_ast_to_graph("C:/Users/Leila/Desktop/BugsInPy/temp/extracted/buggy/youtube-dl_11_utils_buggy.py")


Wrote 40604 nodes, 118548 edges to C:/Users/Leila/Desktop/BugsInPy/temp/extracted/buggy\youtube-dl_11_utils_buggy.json
