In [None]:
import os
import ast
import pandas as pd
import matplotlib.pyplot as plt
from dotenv import load_dotenv

load_dotenv()

def get_python_files(root_path):
    """
    Recursively walks the root_path and returns a dictionary mapping a module ID
    (with folder separators replaced by dots and no '.py' extension) to its full file path.
    """
    file_tree = {}
    for dirpath, _, filenames in os.walk(root_path):
        for filename in sorted(filenames):  # Ensure files are sorted alphabetically
            if filename.endswith(".py"):
                rel_path = os.path.relpath(os.path.join(dirpath, filename), root_path)
                file_id = rel_path.replace(os.sep, ".").rsplit(".", 1)[0]  # Remove .py extension
                file_tree[file_id] = os.path.join(dirpath, filename)
    return file_tree

def module_exists(module_parts, root_path):
    """
    Given a list of module parts (e.g. ["folder", "file"]), check whether that module exists
    relative to root_path either as a file (folder/file.py) or as a package (folder/file/__init__.py).
    """
    module_file = os.path.join(root_path, *module_parts) + ".py"
    if os.path.isfile(module_file):
        return True
    package_init = os.path.join(root_path, *module_parts, "__init__.py")
    if os.path.isfile(package_init):
        return True
    return False

def extract_imports(file_path, root_path, package_prefix="boltz"):
    """
    Parse file_path with AST and extract the dependencies that are part of the given package_prefix.
    
    For "import" statements the module is assumed to be fully qualified.
    
    For "from X import Y" statements we try to separate two cases:
      - When Y is actually a module file inside package X (i.e. "from folder import file")
      - When Y is just an attribute of the module X (i.e. "from file import variable")
      
    In the first case we record the dependency as "folder.file"; in the second we record it as "file".
    
    In both cases we first strip the package_prefix (e.g. "boltz") because project_root is already
    that package’s root.
    """
    with open(file_path, "r", encoding="utf-8") as f:
        tree = ast.parse(f.read(), filename=file_path)
    
    imports = set()
    
    for node in ast.walk(tree):
        if isinstance(node, ast.Import):
            for alias in node.names:
                # Process only modules starting with the package_prefix.
                if alias.name.startswith(package_prefix):
                    # Remove the package prefix.
                    if alias.name.startswith(package_prefix + "."):
                        mod_name = alias.name[len(package_prefix)+1:]
                    else:
                        mod_name = alias.name[len(package_prefix):]
                    # If the module exists relative to project_root, record it.
                    if module_exists(mod_name.split("."), root_path):
                        imports.add(mod_name)
        elif isinstance(node, ast.ImportFrom):
            # Only handle absolute imports (level 0)
            if node.level == 0 and node.module and node.module.startswith(package_prefix):
                # Remove the package prefix.
                if node.module.startswith(package_prefix + "."):
                    mod_base = node.module[len(package_prefix)+1:]
                else:
                    mod_base = node.module[len(package_prefix):]
                
                for alias in node.names:
                    # --- CASE A: "from folder import file" ---
                    # Try to see if alias.name is a module file inside mod_base.
                    candidate_parts = (mod_base.split(".") if mod_base else []) + [alias.name]
                    if module_exists(candidate_parts, root_path):
                        # If so, record dependency as "folder.file" (or just "file" if mod_base is empty).
                        dependency = ".".join(filter(None, [mod_base, alias.name]))
                        imports.add(dependency)
                    else:
                        # --- CASE B: "from file import variable" ---
                        # In this case, the import is coming from a module (mod_base) and the name is an attribute.
                        # Check if the base module exists.
                        if mod_base and module_exists(mod_base.split("."), root_path):
                            imports.add(mod_base)
                        else:
                            # Fallback: record the full dotted name.
                            dependency = ".".join(filter(None, [mod_base, alias.name]))
                            imports.add(dependency)
    return imports

# Set the project root to the package root (here, the "boltz" folder)
project_root = os.getenv("PROJECT_ROOT")
file_tree = get_python_files(project_root)

# Use the updated extract_imports function. (Only dependencies within the package_prefix will be recorded.)
file_imports = {
    file_id: extract_imports(path, project_root, package_prefix="boltz")
    for file_id, path in file_tree.items()
}

# Remove files with no relevant imports
file_imports = {k: v for k, v in file_imports.items() if v}

# Get a sorted list of unique libraries imported
unique_libraries = sorted(set(lib for libs in file_imports.values() for lib in libs))

# Create a boolean matrix: each row is a file and each column is an imported library.
data = []
for file_id, imports in sorted(file_imports.items()):
    data.append([lib in imports for lib in unique_libraries])

# Count the number of imports per file and per library for display purposes.
file_import_counts = {file_id: len(imports) for file_id, imports in file_imports.items()}
library_import_counts = {lib: sum(lib in imports for imports in file_imports.values())
                          for lib in unique_libraries}

file_labels = [f"{file_id} ({file_import_counts[file_id]})" for file_id in sorted(file_imports.keys())]
library_labels = [f"{lib} ({library_import_counts[lib]})" for lib in unique_libraries]

# Build a DataFrame and display the matrix using matplotlib.
df = pd.DataFrame(data, index=file_labels, columns=library_labels)

plt.figure(figsize=(len(unique_libraries) * 0.5, len(file_imports) * 0.5))
plt.imshow(df, aspect='auto', cmap='Blues')
plt.xticks(range(len(unique_libraries)), library_labels, rotation=90)
plt.yticks(range(len(file_imports)), file_labels)
plt.colorbar(label="Imported")
plt.title("Python File Import Matrix")
plt.show()


In [None]:
# Sort rows and columns by quantity
df_sorted = df.loc[sorted(df.index, key=lambda x: int(x.split(" (")[1][:-1]), reverse=True),
                    sorted(df.columns, key=lambda x: int(x.split(" (")[1][:-1]), reverse=True)]

# Replot with sorted order
plt.figure(figsize=(len(df_sorted.columns) * 0.5, len(df_sorted.index) * 0.5))
plt.imshow(df_sorted, aspect='auto', cmap='Blues')
plt.xticks(range(len(df_sorted.columns)), df_sorted.columns, rotation=90)
plt.yticks(range(len(df_sorted.index)), df_sorted.index)
plt.title("Python File Import Matrix (Sorted)")
plt.show()


In [None]:
import networkx as nx
from pyvis.network import Network

# Create a directed graph
G = nx.DiGraph()

# Build the graph: add an edge from each file to every module it imports.
for source, targets in file_imports.items():
    for target in targets:
        G.add_edge(source, target)

G.remove_node("data.types") #Too heavy node, ruins the layout
G.remove_node("data.const") #Too heavy node, ruins the layout

# Create a Pyvis Network (interactive)
net = Network(height="800px", width="100%", directed=True, notebook=True, cdn_resources="in_line")

# Add nodes with colors based on degree
degrees = {node: G.degree(node) for node in G.nodes()}
min_degree, max_degree = min(degrees.values()), max(degrees.values())

for node in G.nodes():
    color_intensity = 255 - int((degrees[node] - min_degree) / (max_degree - min_degree + 1) * 200)
    net.add_node(node, label=node.replace('.', '.\n'), color=f"rgb({color_intensity},255,{color_intensity})")

# Add edges
for edge in G.edges():
    net.add_edge(edge[0], edge[1])

# Enable interactive physics
net.toggle_physics(True)

# Save and display
net.show("graph.html")


graph.html
