In [1]:
repository_filter: list[str] = []
complexity_threshold: int = 10

In [2]:
import pandas as pd
import warnings
import plotly.graph_objects as go
import code_data_science.data_table as dt

warnings.simplefilter("ignore")

df = dt.read_csv("../samples/cyclomatic_complexity.csv")

# Filter the data frame to only include rows where repositoryPath contains
# a term in the repository_filter (case insensitive)
if len(repository_filter) > 0:
    df = df[
        df["repositoryPath"].str.contains("|".join(repository_filter), case=False)
    ]

# Exit early if there are no records and render a plot with a message
if len(df) == 0:
    fig = go.Figure()
    fig.add_annotation(
        text="No data available for the selected repositories",
        xref="paper",
        yref="paper",
        x=0.5,
        y=0.5,
        showarrow=False,
        font=dict(size=14)
    )
    fig.update_layout(
        xaxis=dict(visible=False),
        yaxis=dict(visible=False),
        margin=dict(l=0, r=0, t=60, b=60),
        title="Cyclomatic Complexity Risk Matrix"
    )
    fig.show(render="plotly_mimetype")
else:
    # Calculate metrics for each repository
    repo_metrics = df.groupby('repositoryPath').agg({
        'complexity': ['mean', 'count', lambda x: (x >= complexity_threshold).sum()]
    }).reset_index()
    
    # Flatten column names
    repo_metrics.columns = ['repository', 'avg_complexity', 'total_methods', 'high_complexity_methods']
    
    # Count unique classes per repository for bubble size
    classes_per_repo = df.groupby('repositoryPath')['className'].nunique().reset_index()
    classes_per_repo.columns = ['repository', 'num_classes']
    
    # Merge the metrics
    repo_metrics = repo_metrics.merge(classes_per_repo, on='repository')
    
    # Create the scatter plot
    fig = go.Figure()
    
    # Add scatter trace
    fig.add_trace(go.Scatter(
        x=repo_metrics['high_complexity_methods'],
        y=repo_metrics['avg_complexity'],
        mode='markers',
        marker=dict(
            size=repo_metrics['num_classes'],
            sizemode='area',
            sizeref=2.*max(repo_metrics['num_classes'])/(100.**2),
            sizemin=4,
            color=repo_metrics['high_complexity_methods'],
            colorscale='YlOrRd',
            showscale=True,
            colorbar=dict(
                title="High Complexity<br>Methods",
                thickness=15,
                len=0.7
            )
        ),
        text=repo_metrics['repository'],
        customdata=repo_metrics[['total_methods', 'num_classes']],
        hovertemplate=(
            '<b>%{text}</b><br>' +
            'Average Complexity: %{y:.2f}<br>' +
            'High Complexity Methods: %{x}<br>' +
            'Total Methods: %{customdata[0]}<br>' +
            'Number of Classes: %{customdata[1]}' +
            '<extra></extra>'
        )
    ))
    
    # Add quadrant lines
    # Calculate median values for quadrant boundaries
    median_high_complexity = repo_metrics['high_complexity_methods'].median()
    median_avg_complexity = repo_metrics['avg_complexity'].median()
    
    # Add vertical line at median high complexity methods
    fig.add_vline(
        x=median_high_complexity,
        line_dash="dash",
        line_color="gray",
        opacity=0.5
    )
    
    # Add horizontal line at median average complexity
    fig.add_hline(
        y=median_avg_complexity,
        line_dash="dash",
        line_color="gray",
        opacity=0.5
    )
    
    # Add quadrant labels
    annotations = [
        # High Risk (top right)
        dict(
            x=0.95,
            y=0.95,
            xref="paper",
            yref="paper",
            text="<b>High Risk</b><br>Many complex methods",
            showarrow=False,
            bgcolor="rgba(255,0,0,0.1)",
            borderpad=4,
            font=dict(size=10)
        ),
        # Systemic Complexity (top left)
        dict(
            x=0.05,
            y=0.95,
            xref="paper",
            yref="paper",
            text="<b>Systemic</b><br>Pervasive complexity",
            showarrow=False,
            bgcolor="rgba(255,165,0,0.1)",
            borderpad=4,
            font=dict(size=10)
        ),
        # Localized Issues (bottom right)
        dict(
            x=0.95,
            y=0.05,
            xref="paper",
            yref="paper",
            text="<b>Localized</b><br>Few problem areas",
            showarrow=False,
            bgcolor="rgba(255,255,0,0.1)",
            borderpad=4,
            font=dict(size=10)
        ),
        # Low Risk (bottom left)
        dict(
            x=0.05,
            y=0.05,
            xref="paper",
            yref="paper",
            text="<b>Low Risk</b><br>Well-maintained",
            showarrow=False,
            bgcolor="rgba(0,255,0,0.1)",
            borderpad=4,
            font=dict(size=10)
        )
    ]
    
    # Update layout
    fig.update_layout(
        title=dict(
            text=f"Cyclomatic Complexity Risk Matrix<br><sub>Bubble size represents number of classes | Threshold: {complexity_threshold}</sub>",
            font=dict(size=16)
        ),
        xaxis=dict(
            title="Number of High Complexity Methods (≥" + str(complexity_threshold) + ")",
            gridcolor='rgba(128,128,128,0.2)',
            showgrid=True,
            zeroline=True,
            zerolinecolor='rgba(128,128,128,0.2)'
        ),
        yaxis=dict(
            title="Average Complexity per Repository",
            gridcolor='rgba(128,128,128,0.2)',
            showgrid=True,
            zeroline=True,
            zerolinecolor='rgba(128,128,128,0.2)'
        ),
        margin=dict(l=60, r=120, t=100, b=60),
        plot_bgcolor='white',
        annotations=annotations,
        hovermode='closest'
    )
    
    # Show the figure
    fig.show(render="plotly_mimetype")