In [5]:
repository_filter: list[str] = []

In [6]:
import pandas as pd
import warnings
import plotly.express as px
import code_data_science.data_table as dt
import code_data_science.palette as palette

warnings.simplefilter("ignore")

df = dt.read_csv("../samples/dependency_vulnerabilities.csv")

df["repositoryWithBranch"] = df["repositoryPath"] + "/" + df["repositoryBranch"]

# Filter the data frame to only include rows where repositoryWithBranch contain
# a term in the repository_filter (case insensitive)
if len(repository_filter) > 0:
    df = df[
        df["repositoryWithBranch"].str.contains("|".join(repository_filter), case=False)
    ]


def create_bar_plot(df_plot):
    if df_plot is None:
        df_plot = pd.DataFrame(
            {
                "Type of version required to fix vulnerability": [
                    "Patch",
                    "Minor",
                    "Major",
                    "No fixed version",
                ],
                "Low": 0,
                "Moderate": 0,
                "High": 0,
                "Critical": 0,
                "Critical %": 0,
                "High %": 0,
                "Moderate %": 0,
                "Low %": 0,
                "Total": 0,
            }
        )

    fig = px.bar(
        df_plot,
        x="Type of version required to fix vulnerability",
        y=["Critical", "High", "Moderate", "Low"],
        color_discrete_map={
            "Low": "#52BBA0",
            "Moderate": "#FEE968",
            "High": "#FABA49",
            "Critical": "#FF5B5B",
        },
        barmode="stack",
        hover_name="Type of version required to fix vulnerability",
        hover_data={
            "Critical %": True,
            "High %": True,
            "Moderate %": True,
            "Low %": True,
        },
        labels={"variable": "Severity"},
    )

    # Set the axis labels and title
    fig.update_layout(
        xaxis={"title": "Type of version required to fix vulnerability"},
        yaxis={"title": "Vulnerability count"},
        margin=dict(l=0, r=0, t=30, b=0),
    )
    return fig


# Exit early if there are no stack traces and render a plot with a message
if len(df) == 0:
    fig = create_bar_plot(None)
    fig.update_yaxes(range=[0, 10])
    fig.show(render="plotly_mimetype")
else:

    def get_semver_fix(version, fixed_version):
        """
        looks at current version and fixed version and determines if the fix is a major, minor, patch version, or no fix
        """
        version_components = version.split(".")
        fixed_components = fixed_version.split(".")

        # if fixed version is empty, return "No fixed version"
        if fixed_version == "":
            return "No fixed version"

        if len(version_components) < 3:
            # fill in the missing version components with 0
            for i in range(3 - len(version_components)):
                version_components.append("0")
        if len(fixed_components) < 3:
            # fill in the missing version components with 0
            for i in range(3 - len(fixed_components)):
                fixed_components.append("0")

        elif version_components[0] != fixed_components[0]:
            return "Major"
        elif version_components[1] != fixed_components[1]:
            return "Minor"
        elif version_components[2] != fixed_components[2]:
            return "Patch"
        else:
            return "Unknown"

    # drop unnecessary columns
    df = df.drop(
        columns=[
            "repositoryOrigin",
            "repositoryPath",
            "repositoryBranch",
            "repositoryWithBranch",
            "cve",
            "groupId",
            "artifactId",
            "summary",
            "depth",
        ]
    )

    # fill NaN values with empty string
    df["fixedVersion"] = df["fixedVersion"].fillna("")
    df["version"] = df["version"].fillna("")
    # make sure version and fixedVersion is a string
    df["fixedVersion"] = df["fixedVersion"].astype(str)
    df["version"] = df["version"].astype(str)

    # add column 'semverFix' to dataframe
    df["semverFix"] = df.apply(
        lambda x: get_semver_fix(x["version"], x["fixedVersion"]), axis=1
    )
    # count the occurrences of each semver fix
    semverFix_counts = df["semverFix"].value_counts()

    # sort the semver fixes in desired order
    desired_order = ["Patch", "Minor", "Major", "No fixed version"]
    semverFix_counts = semverFix_counts.reindex(desired_order)

    # count the number severity levels for each semver fix
    critical_count = df[df["severity"] == "CRITICAL"]["semverFix"].value_counts()
    critical_count = critical_count.reindex(desired_order)

    high_count = df[df["severity"] == "HIGH"]["semverFix"].value_counts()
    high_count = high_count.reindex(desired_order)

    moderate_count = df[df["severity"] == "MODERATE"]["semverFix"].value_counts()
    moderate_count = moderate_count.reindex(desired_order)

    low_count = df[df["severity"] == "LOW"]["semverFix"].value_counts()
    low_count = low_count.reindex(desired_order)

    # Create a DataFrame with the data
    df_plot = pd.DataFrame(
        {
            "Type of version required to fix vulnerability": semverFix_counts.index,
            "Low": low_count,
            "Moderate": moderate_count,
            "High": high_count,
            "Critical": critical_count,
        }
    )

    # Calculate the total count for each bar
    df_plot["Total"] = df_plot[["Critical", "High", "Moderate", "Low"]].sum(axis=1)

    # Calculate the percentage of the total for each severity level
    df_plot["Critical %"] = (
        (df_plot["Critical"] / df_plot["Total"] * 100).fillna(0).round(1)
    )
    df_plot["High %"] = (df_plot["High"] / df_plot["Total"] * 100).fillna(0).round(1)
    df_plot["Moderate %"] = (
        (df_plot["Moderate"] / df_plot["Total"] * 100).fillna(0).round(1)
    )
    df_plot["Low %"] = (df_plot["Low"] / df_plot["Total"] * 100).fillna(0).round(1)

    fig = create_bar_plot(df_plot)

    # Set the y-axis range
    fig.update_traces(width=0.33, textposition="auto")

    # Show the figure
    fig.show(render="plotly_mimetype")