In [None]:
import networkx as nx
import graphviz as gv

edges = [(1,2),(2,3),(3,2),(4,5),(1,4)]

gr = nx.DiGraph()
gr.add_edges_from(edges)

print("dead ends:", find_dead_ends(gr))

G = gv.Digraph()
for u, v in edges:
    G.edge(str(u), str(v))
G

In [None]:
from dependency_graph import *

G = build_dependency_graph(
  "../ocean/backend",
  show_3rdparty=False,
  summarize_external=True,
  exclude=["main", "index"],
)
G

In [None]:
from jupyter_utils.preamble import *

In [None]:
from pathlib import Path

def find_python_files(project_root):
    """Return a list of all Python files within the project_root directory."""
    return list(Path(project_root).rglob("*.py", ))

In [None]:
import ast

def analyze_imports(file_path):
    """Analyze a Python file to extract import statements."""
    with open(file_path, "r", encoding="utf-8") as source_file:
        source_code = source_file.read()
    
    tree = ast.parse(source_code, filename=file_path)
    
    # Extract top-level import statements
    imports = [node for node in tree.body if isinstance(node, (ast.Import, ast.ImportFrom))]
    
    return imports

nodes = analyze_imports(Path("api/serialize.py"))
for node in nodes:
    if isinstance(node, ast.ImportFrom):
        print(node.module, [n.name for n in node.names])
    if isinstance(node, ast.Import):
        print([n.name for n in node.names])
# tree.body[7].test.id == "TYPE_CHECKING"

In [None]:
def extract_module_names(import_nodes):
    """Extract module names from import nodes."""
    module_names = []
    for node in import_nodes:
        if isinstance(node, ast.Import):
            for alias in node.names:
                module_names.append(alias.name)
        elif isinstance(node, ast.ImportFrom):
            module_names.append(node.module)
    return module_names

extract_module_names(nodes)

In [None]:
import sys
from collections import Counter
# import networkx as nx
import graphviz as gv

def module_type_node_attrs(module_type: str):
    if module_type == "stdlib" or module_type == "builtin":
        return {
            "fillcolor": "lightblue",
            "style": "filled"
        }
    if module_type == "3rdparty":
        return {
            "fillcolor": "black",
            "fontcolor": "white",
            "style": "filled"
        }
    else:
        # project
        return {
            "fillcolor": "#e0e0e0",
            "style": "filled"
        }

def build_dependency_graph(
    project_root,
    *,
    show_3rdparty: bool = True,
    show_stdlib_builtin: bool = False,
    summarize_external: bool = True,
    use_clusters: bool = True,
    exclude: list[str] = [],
):
    """Build a graph representing internal dependencies of the project."""
    # G = nx.DiGraph()
    G = gv.Digraph(strict=True)
    paths = [f for f in find_python_files(project_root) if f.parts[0] != "drafts"]
    module_paths = [(*f.parts[:-1], f.stem) for f in paths]
    modules = [".".join(mp) for mp in module_paths]

    project_root_modules = Counter([root for root, *_rest in module_paths])
    nodes = set()
    subgraphs = {}
    import_edges = set()
    
    for py_file, module, module_path in zip(paths, modules, module_paths):
        module_path = tuple(module.split("."))
        
        imports = analyze_imports(py_file)
        import_module_names = extract_module_names(imports)
        import_module_paths = [tuple(s.split(".")) for s in import_module_names]

        module_names = [module, *import_module_names]
        module_paths = [module_path, *import_module_paths]
        for m, mp in zip(module_names, module_paths):
            if m not in nodes:
                
                # Determine type (project / built-in / 3rd party)
                root = mp[0]
                if root in sys.stdlib_module_names:
                    module_type = "stdlib"
                elif root in sys.builtin_module_names:
                    module_type = "builtin"
                elif root in project_root_modules:
                    module_type = "project"
                else:
                    module_type = "3rdparty"

                # Might skip module
                if not show_stdlib_builtin and module_type == "stdlib" or module_type == "builtin":
                    continue
                if not show_3rdparty and module_type == "3rdparty":
                    continue
                if m in exclude:
                    continue

                # Merge external module paths to root module name
                if module_type != "project" and summarize_external:
                    m = mp[0]
                    mp = mp[:1]

                # Subgraph
                H = G
                if use_clusters:
                    if (module_type == "project" and (project_root_modules[root] > 1 or len(mp) > 1)) or (module_type != "project" and not summarize_external):
                        if root not in subgraphs:
                            H = gv.Digraph(name=f"cluster[{root}]")
                            subgraphs[root] = H
                        else:
                            H = subgraphs[root]

                # Add node(s) and parent-child edges
                prev = None
                # min_level = 1 if use_clusters else 0
                min_level = 0
                for i in range(min_level + 1, len(mp) + 1):
                    subpath = ".".join(mp[:i])
                    name = mp[i - 1]
                    if subpath not in nodes:
                        H.node(subpath, type=module_type, label=name, shape="rect", **module_type_node_attrs(module_type))
                        nodes.add(subpath)
                        if prev:
                            H.edge(prev, subpath, type="parent", color="black", penwidth="1", arrowtail="ediamond", dir="back")
                    prev = subpath
        
        if module in nodes:
            for import_module in import_module_names:
                if import_module in nodes:
                    import_edges.add((module, import_module))

    for H in subgraphs.values():
        G.subgraph(H)
    for u, v in import_edges:
        extra = {}
        if use_clusters and v in subgraphs:
            # extra["lhead"] = ...
            pass
        G.edge(u, v, type="import", color="red", penwidth="0.5", **extra)

    return G

G = build_dependency_graph(".", show_3rdparty=False, summarize_external=True, exclude=["main", "index"])
G

In [None]:
from pathlib import Path

def find_python_files(project_root):
    """Return a list of all Python files within the project_root directory."""
    return list(Path(project_root).rglob("*.py", ))

In [None]:
import ast

def analyze_imports(file_path):
    """Analyze a Python file to extract import statements."""
    with open(file_path, "r", encoding="utf-8") as source_file:
        source_code = source_file.read()
    
    tree = ast.parse(source_code, filename=file_path)
    
    # Extract top-level import statements
    imports = [node for node in tree.body if isinstance(node, (ast.Import, ast.ImportFrom))]
    
    return imports

nodes = analyze_imports(Path("api/serialize.py"))
for node in nodes:
    if isinstance(node, ast.ImportFrom):
        print(node.module, [n.name for n in node.names])
    if isinstance(node, ast.Import):
        print([n.name for n in node.names])
# tree.body[7].test.id == "TYPE_CHECKING"

In [None]:
def extract_module_names(import_nodes):
    """Extract module names from import nodes."""
    module_names = []
    for node in import_nodes:
        if isinstance(node, ast.Import):
            for alias in node.names:
                module_names.append(alias.name)
        elif isinstance(node, ast.ImportFrom):
            module_names.append(node.module)
    return module_names

extract_module_names(nodes)