In [1]:
repository_filter: list[str] = []
top_n_classes: int = 50

In [None]:
import pandas as pd
import warnings
import plotly.graph_objects as go
import plotly.express as px
import code_data_science.data_table as dt
import numpy as np

warnings.simplefilter("ignore")

df = dt.read_csv("../samples/cyclomatic_complexity.csv")

# Filter the data frame to only include rows where repositoryPath contains
# a term in the repository_filter (case insensitive)
if len(repository_filter) > 0:
    df = df[
        df["repositoryPath"].str.contains("|".join(repository_filter), case=False)
    ]

# Exit early if there are no records and render a plot with a message
if len(df) == 0:
    fig = go.Figure()
    fig.add_annotation(
        text="No data available for the selected repositories",
        xref="paper",
        yref="paper",
        x=0.5,
        y=0.5,
        showarrow=False,
        font=dict(size=14)
    )
    fig.update_layout(
        xaxis=dict(visible=False),
        yaxis=dict(visible=False),
        margin=dict(l=0, r=0, t=60, b=60),
        title="Cyclomatic Complexity Heatmap"
    )
    fig.show(render="plotly_mimetype")
else:
    # Aggregate complexity metrics by repository and class
    class_metrics = df.groupby(['repositoryPath', 'className']).agg({
        'complexity': ['sum', 'mean', 'max', 'count']
    }).reset_index()
    
    # Flatten column names
    class_metrics.columns = ['repository', 'className', 'total_complexity', 'avg_complexity', 'max_complexity', 'method_count']
    
    # Get short class names for display
    class_metrics['classShortName'] = class_metrics['className'].str.split('.').str[-1]
    
    # Get top N classes by total complexity
    top_classes = class_metrics.nlargest(min(top_n_classes, len(class_metrics)), 'total_complexity')
    
    # Create pivot table for heatmap
    # We'll show different metrics as columns
    metrics_data = []
    for _, row in top_classes.iterrows():
        metrics_data.append({
            'class': f"{row['repository'].split('/')[-1]}::{row['classShortName']}",
            'metric': 'Total Complexity',
            'value': row['total_complexity']
        })
        metrics_data.append({
            'class': f"{row['repository'].split('/')[-1]}::{row['classShortName']}",
            'metric': 'Avg Complexity',
            'value': row['avg_complexity']
        })
        metrics_data.append({
            'class': f"{row['repository'].split('/')[-1]}::{row['classShortName']}",
            'metric': 'Max Complexity',
            'value': row['max_complexity']
        })
        metrics_data.append({
            'class': f"{row['repository'].split('/')[-1]}::{row['classShortName']}",
            'metric': 'Method Count',
            'value': row['method_count']
        })
    
    metrics_df = pd.DataFrame(metrics_data)
    pivot_table = metrics_df.pivot(index='class', columns='metric', values='value')
    
    # Reorder columns
    pivot_table = pivot_table[['Method Count', 'Avg Complexity', 'Max Complexity', 'Total Complexity']]
    
    # Sort by total complexity to maintain order
    pivot_table = pivot_table.sort_values('Total Complexity', ascending=False)
    
    # Normalize values for each metric (0-1 scale) for better color representation
    normalized_data = pivot_table.copy()
    for col in normalized_data.columns:
        col_min = normalized_data[col].min()
        col_max = normalized_data[col].max()
        if col_max > col_min:
            normalized_data[col] = (normalized_data[col] - col_min) / (col_max - col_min)
    
    # Create custom hover text
    hover_text = []
    for i in range(len(pivot_table)):
        hover_row = []
        for j, col in enumerate(pivot_table.columns):
            value = pivot_table.iloc[i, j]
            if col == 'Avg Complexity':
                hover_row.append(f"{col}: {value:.1f}")
            else:
                hover_row.append(f"{col}: {int(value)}")
        hover_text.append(hover_row)
    
    # Create the heatmap
    fig = go.Figure(data=go.Heatmap(
        z=normalized_data.values,
        x=normalized_data.columns,
        y=normalized_data.index,
        colorscale=[
            [0, '#E8F5E9'],
            [0.2, '#A5D6A7'],
            [0.4, '#FFE082'],
            [0.6, '#FFB74D'],
            [0.8, '#FF8A65'],
            [1, '#EF5350']
        ],
        colorbar=dict(
            title="Normalized<br>Complexity",
            thickness=15,
            len=0.7
        ),
        text=hover_text,
        texttemplate="",
        hovertemplate=(
            '<b>%{y}</b><br>' +
            '%{text}' +
            '<extra></extra>'
        )
    ))
    
    # Add text annotations for actual values
    annotations = []
    for i in range(len(pivot_table)):
        for j, col in enumerate(pivot_table.columns):
            value = pivot_table.iloc[i, j]
            if col == 'Avg Complexity':
                text = f"{value:.1f}"
            else:
                text = str(int(value))
            
            annotations.append(
                go.layout.Annotation(
                    text=text,
                    x=j,
                    y=i,
                    xref="x",
                    yref="y",
                    showarrow=False,
                    font=dict(
                        size=10,
                        color="black" if normalized_data.iloc[i, j] < 0.6 else "white"
                    )
                )
            )
    
    # Update layout
    fig.update_layout(
        title=dict(
            text=f"Cyclomatic Complexity Heatmap by Class<br><sub>Top {len(pivot_table)} classes by total complexity</sub>",
            font=dict(size=16)
        ),
        xaxis=dict(
            title="Complexity Metrics",
            tickfont=dict(size=12),
            showgrid=False,
            side="bottom"
        ),
        yaxis=dict(
            title="Repository::Class",
            tickfont=dict(size=10),
            showgrid=False,
            autorange="reversed"  # Put highest complexity at top
        ),
        annotations=annotations,
        margin=dict(l=250, r=50, t=100, b=80),
        height=max(600, len(pivot_table) * 25 + 200),
        plot_bgcolor='white'
    )
    
    # Show the figure
    fig.show(render="plotly_mimetype")