# Dependency usage

This notebook shows the distribution of dependencies by version

_Note: If you have opened this notebook from Moderne, the data table is already loaded locally as `./dependency-usage-violin.xlsx`._

In [104]:
data_table_file = "./dependency-usage-violin.xlsx"

In [None]:
import piplite

await piplite.install('pandas')
await piplite.install('openpyxl')
await piplite.install('matplotlib')
await piplite.install('seaborn')
await piplite.install('code_data_science')

In [105]:
import pandas as pd
import warnings
warnings.simplefilter("ignore")

In [None]:
df = pd.read_excel(data_table_file,engine="openpyxl")

In [None]:
df = df[['artifactId', 'version', 'depth']]
#df['version'] = list(map(lambda v: ".".join(v.split(".")[0:2]), df['version']))
print(df.head())

In [None]:
from code_data_science.index_versions import index_versions
vmap = index_versions(df.version)
df['nVersion'] = list(map(lambda v: vmap[v], df.version))

def index_ga(groupartifacts):
    sorted_ga = sorted(list(set(groupartifacts)))
    return { ga : sorted_ga.index(ga) for ga in sorted_ga }
gmap = index_ga(df.artifactId)
df['nArtifactId'] = list(map(lambda g: gmap[g], df.artifactId))

df = df.sort_values(by=['nVersion', 'nArtifactId'])
print(df.head())

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.set(context="notebook")
sns.set_style("whitegrid")
fig, ax = plt.subplots(figsize=(18,7))

sns.stripplot(x="nArtifactId", y="nVersion", data=df, size=8, ax=ax, palette="bright")
sns.violinplot(x="nArtifactId", y="nVersion", data=df, size=8, ax=ax, palette="deep")

ax.yaxis.set_ticks(list(vmap.values()), labels=list(vmap.keys()))
ax.xaxis.set_ticks(list(gmap.values()), labels=list(gmap.keys()))
ax.set_yticklabels(labels=list(vmap.keys()))
ax.set_xticklabels(labels=list(gmap.keys()), rotation=90)
ax.set_xlabel("Artifacts")
ax.set_ylabel("Versions")
ax.set_title("Artifact versions in use")
plt.show()