# Imports

In [10]:
import json
from collections import defaultdict, deque
import pandas as pd

# Loading File and Converting into Graph

In [11]:
# Load your pydeps-style JSON (flat structure)
with open("dependencies.json") as f:
    data = json.load(f)

# Initialize graphs
graph = defaultdict(set)         # module -> set of imports
reverse_graph = defaultdict(set) # module -> set of modules that import it
all_modules = set(data.keys())

# Build dependency and reverse dependency graphs
for mod, info in data.items():
    imports = info.get("imports", [])
    for dep in imports:
        graph[mod].add(dep)
        reverse_graph[dep].add(mod)

# Calculating Fan-in and Fan-out

In [12]:
# Fan-in and fan-out
fan_in = {mod: len(reverse_graph[mod]) for mod in all_modules}
fan_out = {mod: len(graph[mod]) for mod in all_modules}


# Create DataFrame from fan-in and fan-out
df_fan = pd.DataFrame([
    {"module": mod, "fan_in": fan_in.get(mod, 0), "fan_out": fan_out.get(mod, 0)}
    for mod in sorted(set(fan_in) | set(fan_out))
])

# Optional: Sort by total coupling (fan_in + fan_out)
df_fan["total_coupling"] = df_fan["fan_in"] + df_fan["fan_out"]
df_fan = df_fan.sort_values(by="total_coupling", ascending=False)

# Display the table
print(df_fan.to_string(index=False))

                               module  fan_in  fan_out  total_coupling
                             __main__       0       45              45
                                attrs      30        4              34
       apscheduler._schedulers.async_       4       26              30
                      apscheduler.abc      21        4              25
              apscheduler._structures      15       10              25
                   apscheduler._utils      21        3              24
                                 attr      19        3              22
                          apscheduler      10       11              21
                  apscheduler._events      14        6              20
        apscheduler.eventbrokers.base       6       11              17
                      attr.validators      15        0              15
    apscheduler.datastores.sqlalchemy       1       13              14
         apscheduler._schedulers.sync       3       11              14
      

# Highly Coupled Modules

In [7]:
# Highly coupled modules (fan-in + fan-out exceeds threshold)
coupling_threshold = 20
highly_coupled = [mod for mod in all_modules if fan_in[mod] + fan_out[mod] > coupling_threshold]
print("Highly Coupled Modules:", highly_coupled)

Highly Coupled Modules: ['apscheduler._utils', 'apscheduler', 'apscheduler._schedulers.async_', 'apscheduler.abc', 'apscheduler._structures', 'attr', '__main__', 'attrs']


# Unused and Disconnected Modules

In [9]:
# Unused modules (no in or out edges)
unused = [mod for mod in all_modules if fan_in[mod] == 0 and fan_out[mod] == 0]

# Disconnected modules (either fan-in or fan-out is 0)
disconnected = [mod for mod in all_modules if fan_in[mod] == 0 or fan_out[mod] == 0]

print("Unused Modules:", unused)
print("Disconnected Modules:", disconnected)

Unused Modules: []
Disconnected Modules: ['apscheduler.triggers', 'typing_extensions', 'sniffio', 'apscheduler._exceptions', 'apscheduler._converters', 'anyio.streams', 'apscheduler.eventbrokers', 'apscheduler.serializers', 'apscheduler.datastores', 'anyio', 'apscheduler.executors', 'apscheduler._schedulers', 'attr.validators', '__main__', 'apscheduler._enums']


# Cyclic Dependencies

In [14]:
def detect_cycles(graph):
    visited = set()
    path = []
    cycles = []

    def dfs(node, stack):
        visited.add(node)
        stack.append(node)

        for neighbor in graph.get(node, []):
            if neighbor not in visited:
                dfs(neighbor, stack)
            elif neighbor in stack:
                # Found a cycle; slice path from neighbor to current
                cycle_start_index = stack.index(neighbor)
                cycle = stack[cycle_start_index:].copy()
                cycles.append(cycle)

        stack.pop()

    for node in graph:
        if node not in visited:
            dfs(node, [])

    return cycles



cycle_paths = detect_cycles(graph)

print(f"\nTotal Cycles Detected: {len(cycle_paths)}\n")
for idx, cycle in enumerate(cycle_paths, 1):
    print(f"Cycle {idx}: {' -> '.join(cycle)} -> {cycle[0]}")



Total Cycles Detected: 34

Cycle 1: anyio.abc -> anyio.from_thread -> anyio.abc
Cycle 2: attr -> attr
Cycle 3: attr -> attr.setters -> attr
Cycle 4: apscheduler.abc -> apscheduler._structures -> apscheduler._validators -> apscheduler._utils -> apscheduler.abc
Cycle 5: apscheduler._structures -> apscheduler._validators -> apscheduler._utils -> apscheduler._structures
Cycle 6: apscheduler.abc -> apscheduler._structures -> apscheduler._validators -> apscheduler -> apscheduler._context -> apscheduler._schedulers.async_ -> apscheduler.executors.async_ -> apscheduler.abc
Cycle 7: apscheduler._structures -> apscheduler._validators -> apscheduler -> apscheduler._context -> apscheduler._schedulers.async_ -> apscheduler.executors.async_ -> apscheduler._structures
Cycle 8: apscheduler.datastores.base -> apscheduler.serializers.pickle -> apscheduler.abc -> apscheduler._structures -> apscheduler._validators -> apscheduler -> apscheduler._context -> apscheduler._schedulers.async_ -> apscheduler.dat

# Depth of Dependencies

In [20]:
# Depth of dependencies using BFS
def compute_depth(start):
    visited = set()
    queue = deque([(start, 0)])
    max_depth = 0
    while queue:
        node, depth = queue.popleft()
        if node not in visited:
            visited.add(node)
            max_depth = max(max_depth, depth)
            for neighbor in graph.get(node, []):
                queue.append((neighbor, depth + 1))
    return max_depth

dependency_depths = {mod: compute_depth(mod) for mod in all_modules}
dependency_depths = sorted(dependency_depths.items(), key=lambda x: x[1], reverse=True)
print("Dependency Depths:")
df_depths = pd.DataFrame(dependency_depths, columns=["module", "depth"])
print(df_depths.to_string(index=False))

Dependency Depths:
                               module  depth
            apscheduler.triggers.cron      8
       apscheduler.triggers.combining      8
            apscheduler.triggers.date      8
        apscheduler.triggers.interval      8
apscheduler.triggers.calendarinterval      8
       apscheduler.datastores.mongodb      7
         apscheduler.executors.async_      7
    apscheduler.datastores.sqlalchemy      7
     apscheduler.executors.subprocess      7
                   apscheduler._utils      7
                  apscheduler._events      7
                      apscheduler.abc      7
     apscheduler.triggers.cron.fields      7
         apscheduler.executors.thread      7
          apscheduler.datastores.base      6
apscheduler.triggers.cron.expressions      6
       apscheduler.eventbrokers.redis      6
     apscheduler.eventbrokers.psycopg      6
              apscheduler._structures      6
              apscheduler._decorators      6
       apscheduler.eventbrokers.loca

# Core Modules and Their Impact

In [25]:
# Core modules = high fan-in
core_modules = sorted(fan_in.items(), key=lambda x: x[1], reverse=True)[:3]

# Determine impacted modules if core module changes (traverse reverse graph)
def impacted_by(mod):
    impacted = set()
    queue = deque([mod])
    while queue:
        current = queue.popleft()
        for dependent in reverse_graph.get(current, []):
            if dependent not in impacted:
                impacted.add(dependent)
                queue.append(dependent)
    return impacted

core_module_impact = {mod: impacted_by(mod) for mod, _ in core_modules}

print("Top Core Modules (by fan-in):", core_modules)
print("\nModules affected if core modules are modified:")
for core, impacted in core_module_impact.items():
    print(f"  {core} affects {len(impacted)} module(s): {impacted}")

Top Core Modules (by fan-in): [('attrs', 30), ('apscheduler._utils', 21), ('apscheduler.abc', 21)]

Modules affected if core modules are modified:
  attrs affects 37 module(s): {'apscheduler.datastores.mongodb', 'apscheduler.serializers.cbor', 'apscheduler.datastores.base', 'apscheduler.executors.async_', 'apscheduler.triggers.cron.expressions', 'apscheduler.executors.qt', 'apscheduler.datastores.sqlalchemy', 'apscheduler.datastores.memory', 'apscheduler.eventbrokers.redis', 'apscheduler.executors.subprocess', 'apscheduler.eventbrokers.psycopg', 'apscheduler.eventbrokers.base', 'apscheduler._utils', 'apscheduler.serializers.json', 'apscheduler.serializers.pickle', 'apscheduler._validators', 'apscheduler._events', 'apscheduler.abc', 'apscheduler._schedulers.async_', 'apscheduler', 'apscheduler._schedulers.sync', 'apscheduler.triggers.cron.fields', 'apscheduler.triggers.cron', 'apscheduler._structures', 'apscheduler._decorators', 'apscheduler._context', 'apscheduler.eventbrokers.local', 