# Repository release order

This report analyzes inter-repository dependencies to suggest an optimal release order for a release train, helping coordinate multi-repository releases.

In [None]:
groupId_filter: str = ""  # Filter by groupId prefix (e.g., "org.openrewrite")
visualization_type: str = "network"  # Options: network, hierarchy, matrix

In [17]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import networkx as nx
from code_data_science import data_table as dt
import code_data_science.palette as palette
from collections import defaultdict

df = dt.read_csv("../samples/dependencies_in_use.csv")

if len(df) == 0:
    fig = go.Figure()
    fig.add_annotation(
        x=0.5, y=0.5, text="No dependency data found", showarrow=False, font=dict(size=20)
    )
else:
    # Create repository identifier
    df["repository"] = df["repositoryOrigin"] + "/" + df["repositoryPath"]
    
    # Apply groupId filter if specified
    if groupId_filter:
        df = df[df["groupId"].str.startswith(groupId_filter)]
    
    # Create a mapping of artifacts to their repositories
    artifact_to_repo = {}
    for _, row in df.iterrows():
        artifact_key = f"{row['groupId']}:{row['artifactId']}"
        if row["projectName"] and artifact_key not in artifact_to_repo:
            artifact_to_repo[artifact_key] = row["repository"]
    
    # Build dependency graph between repositories (treating all dependencies equally)
    repo_dependencies = defaultdict(set)
    dependency_counts = defaultdict(int)
    
    for _, dep in df.iterrows():
        consumer_repo = dep["repository"]
        dependency_key = f"{dep['groupId']}:{dep['artifactId']}"
        
        # Find which repository produces this dependency
        producer_repo = artifact_to_repo.get(dependency_key)
        
        if producer_repo and producer_repo != consumer_repo:
            repo_dependencies[consumer_repo].add(producer_repo)
            dependency_counts[(producer_repo, consumer_repo)] += 1
    
    # Create directed graph
    G = nx.DiGraph()
    for consumer, producers in repo_dependencies.items():
        for producer in producers:
            weight = dependency_counts[(producer, consumer)]
            G.add_edge(producer, consumer, weight=weight)
    
    if len(G.nodes()) == 0:
        fig = go.Figure()
        fig.add_annotation(
            x=0.5, y=0.5, 
            text="No inter-repository dependencies found", 
            showarrow=False, 
            font=dict(size=16)
        )
    else:
        # Calculate release order using topological sort
        try:
            release_order = list(nx.topological_sort(G))
            has_cycles = False
        except nx.NetworkXUnfeasible:
            # Graph has cycles, find them
            has_cycles = True
            cycles = list(nx.simple_cycles(G))
            # Use a modified topological sort that ignores cycles
            G_copy = G.copy()
            for cycle in cycles:
                if len(cycle) > 1:
                    G_copy.remove_edge(cycle[-1], cycle[0])
            release_order = list(nx.topological_sort(G_copy))
        
        # Store release order for display below the graph
        release_order_list = release_order
        
        if visualization_type == "network":
            # Create network visualization
            pos = nx.spring_layout(G, k=2, iterations=50, seed=42)
            
            # Create edge traces
            edge_traces = []
            for edge in G.edges(data=True):
                x0, y0 = pos[edge[0]]
                x1, y1 = pos[edge[1]]
                weight = edge[2]['weight']
                
                # Add arrow annotation
                edge_trace = go.Scatter(
                    x=[x0, x1, None],
                    y=[y0, y1, None],
                    mode='lines',
                    line=dict(
                        width=min(weight * 0.5, 10),
                        color='rgba(125,125,125,0.5)'
                    ),
                    hoverinfo='text',
                    text=f"{edge[0]} → {edge[1]}<br>Dependencies: {weight}",
                    showlegend=False
                )
                edge_traces.append(edge_trace)
            
            # Create node trace
            node_x = []
            node_y = []
            node_text = []
            node_color = []
            
            for node in G.nodes():
                x, y = pos[node]
                node_x.append(x)
                node_y.append(y)
                
                # Calculate release tier
                tier = release_order.index(node) if node in release_order else -1
                
                # Get dependency info
                in_deps = list(G.predecessors(node))
                out_deps = list(G.successors(node))
                
                hover_text = f"<b>{node}</b><br>"
                hover_text += f"Release tier: {tier + 1}<br>"
                hover_text += f"Depends on: {len(in_deps)} repos<br>"
                hover_text += f"Required by: {len(out_deps)} repos"
                
                node_text.append(hover_text)
                node_color.append(tier)
            
            node_trace = go.Scatter(
                x=node_x,
                y=node_y,
                mode='markers+text',
                text=[node.split('/')[-1] for node in G.nodes()],
                textposition="top center",
                hoverinfo='text',
                hovertext=node_text,
                marker=dict(
                    size=25,
                    color=node_color,
                    colorscale='Viridis',
                    showscale=True,
                    colorbar=dict(
                        title="Release<br>Tier",
                        thickness=15,
                        len=0.7
                    ),
                    line=dict(width=2, color='white')
                ),
                showlegend=False
            )
            
            fig = go.Figure(data=edge_traces + [node_trace])
            
            title = "Repository Dependency Network & Release Order"
            if has_cycles:
                title += " (⚠️ Circular dependencies detected)"
            
            fig.update_layout(
                title=dict(
                    text=title,
                    x=0.5,
                    xanchor='center'
                ),
                showlegend=False,
                hovermode='closest',
                margin=dict(b=20, l=5, r=5, t=40),
                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                plot_bgcolor='white'
            )
            
        elif visualization_type == "hierarchy":
            # Create hierarchical visualization showing release tiers
            tiers = defaultdict(list)
            tiers_assignment = {}
            
            for repo in release_order:
                tier_level = 0
                # Calculate actual tier based on max dependency depth
                predecessors = list(G.predecessors(repo))
                if predecessors:
                    pred_tiers = [tiers_assignment.get(p, 0) for p in predecessors if p in tiers_assignment]
                    if pred_tiers:
                        tier_level = max(pred_tiers) + 1
                
                tiers_assignment[repo] = tier_level
                tiers[tier_level].append(repo)
            
            # Create Sankey diagram
            labels = []
            sources = []
            targets = []
            values = []
            
            node_index = {}
            idx = 0
            
            # Add nodes for each tier
            for tier, repos in sorted(tiers.items()):
                for repo in repos:
                    labels.append(f"Tier {tier + 1}: {repo.split('/')[-1]}")
                    node_index[repo] = idx
                    idx += 1
            
            # Add edges
            for edge in G.edges(data=True):
                if edge[0] in node_index and edge[1] in node_index:
                    sources.append(node_index[edge[0]])
                    targets.append(node_index[edge[1]])
                    values.append(edge[2]['weight'])
            
            fig = go.Figure(data=[go.Sankey(
                node=dict(
                    pad=15,
                    thickness=20,
                    line=dict(color="black", width=0.5),
                    label=labels,
                    color=palette.qualitative()[:len(labels)]
                ),
                link=dict(
                    source=sources,
                    target=targets,
                    value=values
                )
            )])
            
            fig.update_layout(
                title="Repository Release Tiers",
                font_size=10
            )
            
        else:  # matrix visualization
            # Create dependency matrix
            repos = sorted(G.nodes())
            matrix = []
            
            for target in repos:
                row = []
                for source in repos:
                    if G.has_edge(source, target):
                        row.append(G[source][target]['weight'])
                    else:
                        row.append(0)
                matrix.append(row)
            
            # Create heatmap
            fig = go.Figure(data=go.Heatmap(
                z=matrix,
                x=[r.split('/')[-1] for r in repos],
                y=[r.split('/')[-1] for r in repos],
                colorscale='Blues',
                text=[[str(val) if val > 0 else '' for val in row] for row in matrix],
                texttemplate='%{text}',
                textfont={"size": 10},
                hovertemplate='%{y} depends on %{x}<br>Dependencies: %{z}<extra></extra>'
            ))
            
            fig.update_layout(
                title="Repository Dependency Matrix",
                xaxis_title="Dependency (Producer)",
                yaxis_title="Dependent (Consumer)",
                xaxis={'side': 'bottom'},
                yaxis={'autorange': 'reversed'}
            )


Columns (10) have mixed types. Specify dtype option on import or set low_memory=False.



In [18]:
# Display release order as copyable text
if 'release_order_list' in locals() and len(release_order_list) > 0:
    print("SUGGESTED RELEASE ORDER")
    print("=" * 50)
    print("Copy the list below for your release planning:\n")
    
    for i, repo in enumerate(release_order_list):
        print(f"{i+1}. {repo}")
    
    if has_cycles:
        print("\n⚠️ WARNING: Circular dependencies detected!")
        print("The following repositories have circular dependencies:")
        for cycle in cycles[:5]:  # Show first 5 cycles
            print(f"  • {' → '.join(cycle)} → {cycle[0]}")
        if len(cycles) > 5:
            print(f"  • ... and {len(cycles) - 5} more cycles")

SUGGESTED RELEASE ORDER
Copy the list below for your release planning:

1. github.com/openrewrite/rewrite
2. github.com/openrewrite/rewrite-all
3. github.com/openrewrite/rewrite-github-actions
4. github.com/openrewrite/rewrite-analysis
5. github.com/openrewrite/rewrite-apache
6. github.com/openrewrite/rewrite-build-gradle-plugin
7. github.com/openrewrite/rewrite-csharp
8. github.com/openrewrite/rewrite-docker
9. github.com/openrewrite/rewrite-gitlab
10. github.com/openrewrite/rewrite-gradle-tooling-model
11. github.com/openrewrite/rewrite-okhttp
12. github.com/openrewrite/rewrite-openapi
13. github.com/openrewrite/rewrite-python
14. github.com/openrewrite/rewrite-quarkus
15. github.com/openrewrite/rewrite-cucumber-jvm
16. github.com/openrewrite/rewrite-dropwizard
17. github.com/openrewrite/rewrite-jackson
18. github.com/openrewrite/rewrite-logging-frameworks
19. github.com/openrewrite/rewrite-micrometer
20. github.com/openrewrite/rewrite-struts
21. github.com/openrewrite/rewrite-featur

In [19]:
# Alternative format - comma-separated list for easy copy/paste
if 'release_order_list' in locals() and len(release_order_list) > 0:
    print("\nCOMMA-SEPARATED FORMAT:")
    print("=" * 50)
    print("Copy this for scripts or CI/CD pipelines:\n")
    print(",".join(release_order_list))
    
    # Also provide in tiers for parallel releases
    if 'tiers_assignment' in locals():
        print("\n\nRELEASE TIERS (can be released in parallel within each tier):")
        print("=" * 50)
        tier_groups = defaultdict(list)
        for repo, tier in tiers_assignment.items():
            tier_groups[tier].append(repo)
        
        for tier in sorted(tier_groups.keys()):
            print(f"\nTier {tier + 1}:")
            for repo in tier_groups[tier]:
                print(f"  - {repo}")
            print(f"  (Can be released in parallel: {','.join(tier_groups[tier])})")


COMMA-SEPARATED FORMAT:
Copy this for scripts or CI/CD pipelines:

github.com/openrewrite/rewrite,github.com/openrewrite/rewrite-all,github.com/openrewrite/rewrite-github-actions,github.com/openrewrite/rewrite-analysis,github.com/openrewrite/rewrite-apache,github.com/openrewrite/rewrite-build-gradle-plugin,github.com/openrewrite/rewrite-csharp,github.com/openrewrite/rewrite-docker,github.com/openrewrite/rewrite-gitlab,github.com/openrewrite/rewrite-gradle-tooling-model,github.com/openrewrite/rewrite-okhttp,github.com/openrewrite/rewrite-openapi,github.com/openrewrite/rewrite-python,github.com/openrewrite/rewrite-quarkus,github.com/openrewrite/rewrite-cucumber-jvm,github.com/openrewrite/rewrite-dropwizard,github.com/openrewrite/rewrite-jackson,github.com/openrewrite/rewrite-logging-frameworks,github.com/openrewrite/rewrite-micrometer,github.com/openrewrite/rewrite-struts,github.com/openrewrite/rewrite-feature-flags,github.com/openrewrite/rewrite-generative-ai,github.com/openrewrite/rew

In [20]:
# Output the visualization
fig.show(render="plotly_mimetype")