In [None]:
repository_filter: list[str] = []

In [None]:
import pandas as pd
import warnings
import plotly.graph_objects as go
import code_data_science.data_table as dt

warnings.simplefilter("ignore")

df = dt.read_csv("../samples/dependency_vulnerabilities.csv")

df["repositoryWithBranch"] = df["repositoryPath"] + "/" + df["repositoryBranch"]

# Filter the data frame to only include rows where repositoryWithBranch contain
# a term in the repository_filter (case insensitive)
if len(repository_filter) > 0:
    df = df[
        df["repositoryWithBranch"].str.contains("|".join(repository_filter), case=False)
    ]


def create_stacked_bar_plot(df_direct, df_transitive):
    if df_direct is None or df_transitive is None:
        # Create empty dataframes with the correct structure
        fix_types = ["Patch", "Minor", "Major", "No fixed version"]
        df_direct = pd.DataFrame({
            "Type": fix_types,
            "Low": [0] * 4,
            "Moderate": [0] * 4,
            "High": [0] * 4,
            "Critical": [0] * 4,
        })
        df_transitive = pd.DataFrame({
            "Type": fix_types,
            "Low": [0] * 4,
            "Moderate": [0] * 4,
            "High": [0] * 4,
            "Critical": [0] * 4,
        })

    fig = go.Figure()
    
    # Define colors for severity levels
    colors = {
        "Critical": "#FF5B5B",
        "High": "#FABA49",
        "Moderate": "#FEE968",
        "Low": "#52BBA0"
    }
    
    # Calculate totals for percentage calculations
    df_direct["Total"] = df_direct[["Critical", "High", "Moderate", "Low"]].sum(axis=1)
    df_transitive["Total"] = df_transitive[["Critical", "High", "Moderate", "Low"]].sum(axis=1)
    grand_total = df_direct["Total"].sum() + df_transitive["Total"].sum()
    
    # Create x-axis categories with Direct/Transitive suffix
    x_categories = []
    for fix_type in df_direct["Type"]:
        x_categories.extend([f"{fix_type} (Direct)", f"{fix_type} (Transitive)"])
    
    # Add traces for each severity level (stacked)
    for severity in ["Low", "Moderate", "High", "Critical"]:
        y_values_direct = []
        y_values_transitive = []
        customdata_direct = []
        customdata_transitive = []
        
        # Separate direct and transitive values
        for idx in range(len(df_direct)):
            # Direct value
            direct_count = df_direct.iloc[idx][severity]
            direct_total = df_direct.iloc[idx]["Total"]
            y_values_direct.append(direct_count)
            
            # Calculate percentages for direct
            pct_of_bar = (direct_count / direct_total * 100) if direct_total > 0 else 0
            pct_of_all = (direct_count / grand_total * 100) if grand_total > 0 else 0
            customdata_direct.append({
                "bar_pct": round(pct_of_bar, 1),
                "total_pct": round(pct_of_all, 1),
                "type": df_direct.iloc[idx]["Type"]
            })
            
            # Transitive value
            trans_count = df_transitive.iloc[idx][severity]
            trans_total = df_transitive.iloc[idx]["Total"]
            y_values_transitive.append(trans_count)
            
            # Calculate percentages for transitive
            pct_of_bar = (trans_count / trans_total * 100) if trans_total > 0 else 0
            pct_of_all = (trans_count / grand_total * 100) if grand_total > 0 else 0
            customdata_transitive.append({
                "bar_pct": round(pct_of_bar, 1),
                "total_pct": round(pct_of_all, 1),
                "type": df_transitive.iloc[idx]["Type"]
            })
        
        # Add direct trace
        fig.add_trace(go.Bar(
            name=f"{severity} (Direct)",
            x=[f"{t} (Direct)" for t in df_direct["Type"]],
            y=y_values_direct,
            marker_color=colors[severity],
            customdata=customdata_direct,
            legendgroup=severity,
            showlegend=True,
            hovertemplate=(
                '<b>%{customdata.type}</b><br>' +
                f'{severity} (Direct): %{{y}}<br>' +
                'Percent of bar: %{customdata.bar_pct}%<br>' +
                'Percent of all: %{customdata.total_pct}%' +
                '<extra></extra>'
            )
        ))
        
        # Add transitive trace with pattern
        fig.add_trace(go.Bar(
            name=f"{severity} (Transitive)",
            x=[f"{t} (Transitive)" for t in df_transitive["Type"]],
            y=y_values_transitive,
            marker_color=colors[severity],
            marker_pattern_shape="/",
            customdata=customdata_transitive,
            legendgroup=severity,
            showlegend=False,
            hovertemplate=(
                '<b>%{customdata.type}</b><br>' +
                f'{severity} (Transitive): %{{y}}<br>' +
                'Percent of bar: %{customdata.bar_pct}%<br>' +
                'Percent of all: %{customdata.total_pct}%' +
                '<extra></extra>'
            )
        ))
    
    # Update layout
    fig.update_layout(
        title=dict(
            text="<b>Dependency Vulnerabilities by Fix Type</b>",
            x=0.5,
            xanchor="center",
            y=0.95,
            yanchor="top",
            font=dict(size=16)
        ),
        xaxis=dict(
            title="Type of version required to fix vulnerability",
            tickangle=-45
        ),
        yaxis={"title": "Vulnerability count"},
        barmode='stack',
        bargap=0.15,
        margin=dict(l=0, r=0, t=100, b=100),
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.05,
            xanchor="center",
            x=0.5,
            title=dict(
                text="Severity Level<br><sub>Solid: Direct | Striped: Transitive</sub>",
                font=dict(size=12)
            )
        )
    )
    
    return fig


# Exit early if there are no stack traces and render a plot with a message
if len(df) == 0:
    fig = create_stacked_bar_plot(None, None)
    fig.update_yaxes(range=[0, 10])
    fig.show(render="plotly_mimetype")
else:

    def get_semver_fix(version, fixed_version):
        """
        looks at current version and fixed version and determines if the fix is a major, minor, patch version, or no fix
        """
        version_components = version.split(".")
        fixed_components = fixed_version.split(".")

        # if fixed version is empty, return "No fixed version"
        if fixed_version == "":
            return "No fixed version"

        if len(version_components) < 3:
            # fill in the missing version components with 0
            for i in range(3 - len(version_components)):
                version_components.append("0")
        if len(fixed_components) < 3:
            # fill in the missing version components with 0
            for i in range(3 - len(fixed_components)):
                fixed_components.append("0")

        elif version_components[0] != fixed_components[0]:
            return "Major"
        elif version_components[1] != fixed_components[1]:
            return "Minor"
        elif version_components[2] != fixed_components[2]:
            return "Patch"
        else:
            return "Unknown"

    # drop unnecessary columns except depth which we need
    df = df.drop(
        columns=[
            "repositoryOrigin",
            "repositoryPath",
            "repositoryBranch",
            "repositoryWithBranch",
            "cve",
            "groupId",
            "artifactId",
            "summary",
        ]
    )

    # fill NaN values with empty string
    df["fixedVersion"] = df["fixedVersion"].fillna("")
    df["version"] = df["version"].fillna("")
    # make sure version and fixedVersion is a string
    df["fixedVersion"] = df["fixedVersion"].astype(str)
    df["version"] = df["version"].astype(str)

    # add column 'semverFix' to dataframe
    df["semverFix"] = df.apply(
        lambda x: get_semver_fix(x["version"], x["fixedVersion"]), axis=1
    )
    
    # Split into direct and transitive
    df_direct = df[df["depth"] == 0]
    df_transitive = df[df["depth"] != 0]
    
    # Define the order for fix types
    fix_order = ["Patch", "Minor", "Major", "No fixed version"]
    
    # Create aggregated data for direct dependencies
    direct_data = []
    for fix_type in fix_order:
        fix_df = df_direct[df_direct["semverFix"] == fix_type]
        severity_counts = fix_df["severity"].value_counts()
        direct_data.append({
            "Type": fix_type,
            "Critical": severity_counts.get("CRITICAL", 0),
            "High": severity_counts.get("HIGH", 0),
            "Moderate": severity_counts.get("MODERATE", 0),
            "Low": severity_counts.get("LOW", 0)
        })
    df_direct_plot = pd.DataFrame(direct_data)
    
    # Create aggregated data for transitive dependencies
    transitive_data = []
    for fix_type in fix_order:
        fix_df = df_transitive[df_transitive["semverFix"] == fix_type]
        severity_counts = fix_df["severity"].value_counts()
        transitive_data.append({
            "Type": fix_type,
            "Critical": severity_counts.get("CRITICAL", 0),
            "High": severity_counts.get("HIGH", 0),
            "Moderate": severity_counts.get("MODERATE", 0),
            "Low": severity_counts.get("LOW", 0)
        })
    df_transitive_plot = pd.DataFrame(transitive_data)

    fig = create_stacked_bar_plot(df_direct_plot, df_transitive_plot)

    # Show the figure
    fig.show(render="plotly_mimetype")