In [None]:
import numpy as np
import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt

import seaborn as sns
import seaborn.objects as so

In [None]:
sns.set_theme(
  context="paper",
  style="whitegrid",
  palette="colorblind",
  font="Source Sans Pro",
)

In [None]:
%config InlineBackend.figure_format = "retina"

In [None]:
def read_run(file, variant):
  df = pd.read_table(file)
  # Clean up column names
  df.columns = df.columns.str.strip()
  # Drop summary rows
  df = df.iloc[:-3]
  # Only name and line coverage columns
  df = df[["Name", "Cov (L)", "Scope (L)", "CL / SL"]]
  df["Variant"] = variant
  df = df.sort_values("Name", ignore_index=True)
  return df

o0_efb_df = read_run("example-O0-mem2reg-rsb-efb.tsv", "O0 + mem2reg")
o2_df = read_run("example-O2-rsb.tsv", "O2 (before resolution)")
o2_fixed_df = read_run("example-O2-fixed-rsb.tsv", "O2 (after resolution)")

baseline_df = o0_efb_df

# Use name values from baseline run (addresses differ)
o2_df["Name"] = baseline_df["Name"]
o2_fixed_df["Name"] = baseline_df["Name"]

distribution_df = pd.concat([
  o0_efb_df,
  o2_df,
  o2_fixed_df,
]).reset_index(drop=True)

# TODO: Use some kind of wide operation instead of per row
def normalise(row):
  # Line table may differ between runs, giving different scope line counts
  # Use the largest scope line count from any run to recompute ratio
  all_rows = distribution_df[distribution_df["Name"] == row["Name"]]
  max_scope = all_rows["Scope (L)"].max()
  row["Max Scope (L)"] = max_scope
  row["CL / MSL"] = row["Cov (L)"] / row["Max Scope (L)"]
  # Normalise values to baseline
  base_row = baseline_df[baseline_df["Name"] == row["Name"]]
  row["CL / BCL"] = row["Cov (L)"] / base_row["Cov (L)"].iat[0]
  return row

distribution_df = distribution_df.apply(normalise, axis=1)

# distribution_df[distribution_df["Name"].str.contains("list_cmds   ,sep")]
# distribution_df.sort_values("CL / BCL", ascending=False)

# distribution_df

In [None]:
df = distribution_df.copy()
df["Order"] = df.sort_values(by="CL / BCL", ascending=False).groupby("Variant").cumcount()
# df = df[df["Variant"].str.contains("O[01]") & ~(df["Variant"].str.contains("DE"))]
g = sns.relplot(
  df,
  x="Order",
  y="CL / BCL",
  hue="Variant",
  kind="line",
  height=3.5,
)
sns.move_legend(
  g,
  "center right",
  bbox_to_anchor=(0.65, 0.8),
  frameon=True,
  shadow=True,
  title=None,
)
g.set(
  title="Variable value source line coverage (issue 39974)",
  xlabel="Variable index (sorted by coverage)",
  xbound=(0, None),
  ylabel="Covered source lines relative to O0 + mem2reg",
  ybound=(0, None),
)