In [36]:
import plotly.express as px
px.defaults.template = "simple_white"
px.defaults.color_discrete_sequence = px.colors.qualitative.G10
figs = dict()
import seaborn as sns
colors = [x for x in sns.color_palette("deep").as_hex()]

In [16]:
from src.pair_vis import *
from src.distances import *
from src.utils import *

In [17]:
gallery="movies"
notation=notation2="matplotlib"
distance2="levenshtein"
distance="cd"
from_spec = to_spec = ""
notations = gallery_notations(gallery)

fig = distance_scatter(gallery, notation, distance, notation2, distance2, from_spec, to_spec, None)
fig.update_traces(selector=dict(mode="markers"), marker_size=4, marker_color = px.colors.qualitative.G10[0], marker_opacity=0.5)
fig.update_traces(selector=dict(mode="lines"), line_color="red", line_dash="dot", line_width=1)
fig.update_layout(height=300, xaxis_title="Compression Distance", yaxis_title="Levenshtein Distance")
figs["lev-v-compression"] = fig

In [15]:
xn = "altair"
yn = "ggplot2"
distance="cd"
fig = remoteness_scatter(gallery, xn, distance, yn, distance, from_spec, to_spec, None)
fig.update_shapes(line_color="grey", line_width=1)
fig.update_yaxes(constrain="domain")
dtick = 200
fig.update_xaxes(dtick=dtick, title=f"Remoteness in {xn}", range=[0,dtick])
fig.update_yaxes(dtick=dtick, title=f"Remoteness in {yn}", range=[0,dtick])
fig.update_layout(title=f"Remoteness: {yn} vs {xn}", title_x=0.5)
fig.write_image("figs/altair-v-ggplot2_remoteness.svg")
fig.show()

In [14]:
xn = "altair"
yn = "ggplot2"
distance = "from_length"
fig = remoteness_scatter(gallery, xn, distance, yn, distance, from_spec, to_spec, None)
fig.update_shapes(line_color="grey", line_width=1)
fig.update_yaxes(constrain="domain")
dtick = 350
fig.update_xaxes(dtick=dtick, title=f"Length in {xn}", range=[0,dtick])
fig.update_yaxes(dtick=dtick, title=f"Length in {yn}", range=[0,dtick])
fig.update_layout(title=f"Length: {yn} vs {xn}", title_x=0.5)
fig.show()
fig.write_image("figs/altair-v-ggplot2_length.svg")

In [71]:


for pair in [
  ["matplotlib", "pandas"],
  ["vega-lite", "altair"],
  ["ggplot2", "vega-lite"],
]:
  figs["_v_".join(reversed(pair))] = versus(pair[0], pair[1])

In [72]:
for k, v in figs.items():
  v.write_image(f"figs/{k}.pdf")
  v.write_image(f"figs/{k}.svg")

In [38]:
distance="cd"

df = (
    distances_df(gallery=gallery)
    .groupby(["notation", "from_spec"])[distance]
    .median()
    .reset_index()
)

fig = (
    px.violin(
        df,
        x=distance,
        y="notation",
        color="notation",
        height=800,
        width=800,
        points="all",
        color_discrete_sequence=colors,
        category_orders=dict(notation=notations),
        hover_data=["from_spec"],
        labels={distance: f"Specification Remoteness ({distance})"},
        title="Specification Remoteness Distribution",
    )
    .update_traces(
        hoveron="points",
        pointpos=0,
        scalemode="count",
        hoverinfo="none",
        hovertemplate="",
        spanmode="hard",
        line_width=0,
    )
    .update_layout(violingroupgap=0, violingap=0.0, showlegend=False)
)
fig.update_yaxes(title="")
fig.update_xaxes(rangemode="tozero")
fig.show()
fig.write_image("figs/remoteness_violins.svg")

In [43]:

tokens_df = load_tokens()
df = (
    tokens_df.query(f"gallery == '{gallery}'")
    .groupby(["token", "notation"])["spec"]
    .nunique()
    .reset_index()
    .groupby(["notation", "spec"])
    .count()
    .reset_index()
    .sort_values(by="spec", ascending=False)
)
fig = px.bar(
    df,
    x="notation",
    y="token",
    color="notation",
    text="spec",
    height=800,
    width=800,
    color_discrete_sequence=colors,
    category_orders=dict(notation=notations),
    labels=dict(token="Number of Unique Tokens", spec="Number of Uses"),
    title="Unique Token Usage Distribution",
)
fig.update_traces(
    textposition="inside",
    textangle=0,
    insidetextanchor="middle",
    texttemplate="%{y} used %{text}x",
)
fig.update_layout(uniformtext_minsize=8, uniformtext_mode="hide", showlegend=False)
fig.update_xaxes(title="")
df = (
    tokens_df.query(f"gallery == '{gallery}'")
    .groupby(["notation"])["token"]
    .nunique()
    .reset_index()
)
fig.add_scatter(
    x=df["notation"],
    y=df["token"],
    text=df["token"],
    mode="text",
    textposition="top center",
    showlegend=False,
)
fig.write_image("figs/token_bars.svg")