In [None]:
from code_data_science import data_table as dt

df = dt.read_csv("../samples/dependency_usage_violin.csv")

In [None]:
df = df[["artifactId", "version", "depth"]]

In [None]:
from code_data_science.versions import index as index_versions

# make sure version is a string
df["version"] = df["version"].astype(str)

vmap = index_versions(df.version)
df["nVersion"] = list(map(lambda v: vmap[v], df.version))


def index_ga(groupartifacts):
    sorted_ga = sorted(list(set(groupartifacts)))
    return {ga: sorted_ga.index(ga) for ga in sorted_ga}


gmap = index_ga(df.artifactId)
df["nArtifactId"] = list(map(lambda g: gmap[g], df.artifactId))

df = df.sort_values(by=["nVersion", "nArtifactId"])

In [None]:
import plotly.graph_objects as go
import code_data_science.palette as palette

colors = palette.colors_by_weight(500)

fig = go.Figure()

# Add a trace to the plot for each category
for i, category in enumerate(df["nArtifactId"].unique()):
    category_data = df[df["nArtifactId"] == category]

    # Calculate counts for each dependency and version combination
    counts = (
        category_data.groupby("nVersion")["nArtifactId"]
        .count()
        .reset_index(name="count")
    )

    category_data_with_counts = category_data.merge(counts, on="nVersion")

    # Generate hover text including the count information
    hover_text = category_data_with_counts.apply(
        lambda row: f'<b>Artifact</b>: {row["artifactId"]}<br><b>Version</b>: {row["version"]}<br><b>Count</b>: {row["count"]}',
        axis=1,
    )

    fig.add_trace(
        go.Scatter(
            x=category_data["nArtifactId"],
            y=category_data["nVersion"],
            mode="markers",
            marker=dict(color=colors[i % len(colors)], size=8),
            showlegend=False,
            name="",
            text=hover_text,
            hoverinfo="text",
            hoverlabel=dict(font=dict(size=18)),
        )
    )

    fig.add_trace(
        go.Violin(
            x=category_data["nArtifactId"],
            y=category_data["nVersion"],
            fillcolor="black",
            opacity=0.15,
            line_color="black",
            showlegend=False,
            width=0.7,
            bandwidth=0.4,
            hoverinfo="none",
            hoveron="points",
        )
    )

num_versions = df["nVersion"].nunique()
height_per_version = 32
width_per_dependency = 80
fig_height = max(num_versions * height_per_version, 900)
fig_width = max(len(list(gmap.values())) * width_per_dependency, 900)
tick_font_size = 13
# Customizing the layout
fig.update_layout(
    title="Artifact versions in use",
    xaxis_title="Artifacts",
    yaxis_title="Versions",
    height=fig_height,
    width=fig_width,
    xaxis=dict(
        tickfont=dict(size=tick_font_size),
        tickmode="array",
        tickvals=list(gmap.values()),
        ticktext=list(gmap.keys()),
    ),
    yaxis=dict(
        tickfont=dict(size=tick_font_size),
        tickmode="array",
        tickvals=list(vmap.values()),
        ticktext=list(vmap.keys()),
    ),
)

fig.show()