## Setup environment

In [None]:
import os
import sys
module_path = os.path.abspath(os.path.join("../.."))
if module_path not in sys.path:
  sys.path.append(module_path)

import numpy as np
import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt

import seaborn as sns
import seaborn.objects as so

import shared.package_plots as plots

## Setup notebook

In [None]:
pd.options.display.max_colwidth = 100
pd.options.display.min_rows = 20

sns.set_theme(
  context="paper",
  style="whitegrid",
  # Adapted from https://personal.sron.nl/~pault/
  palette=['#4477AA', '#EE6677', '#228833', '#DDAA33', '#66CCEE', '#AA3377', '#BBBBBB'],
  font="Source Sans Pro",
)

%config InlineBackend.figure_format = "retina"

## Load data

In [None]:
def load_data():
  dfs = []

  def read_run(file, variant):
    df = pd.read_table(file)

    # Clean up column names
    df.columns = df.columns.str.strip()
    # Sort by name to aid matching across datasets
    df = df.sort_values("Name", ignore_index=True)
    # Summarise across inlined call sites with arithmetic mean
    df = df.groupby("Name", as_index=False).mean(numeric_only=True)

    df.variant = variant
    dfs.append(df)

  # Order is important here!
  # Some data transformations rely on
  # `iloc[1]` to access the baseline,
  # `diff` to access KE vs. not, etc.
  # Re-check all transformations when changing the order.
  read_run("example-O1.tsv", "O1 (before)")
  read_run("example-O1-fixed.tsv", "O1 (after)")

  # Check names present in each compilation for differences
  print("# Names")
  common_names = set(dfs[0]["Name"])
  for df in dfs:
    common_names = common_names & set(df["Name"])
  print(f"Common names: {len(common_names)}")
  all_names = set()
  for df in dfs:
    all_names = all_names | set(df["Name"])
  all_names_df = pd.DataFrame({ "Name": list(all_names) })
  print(f"All names: {len(all_names)}")
  print()

  def name_diffs(df):
    print(f"## {df.variant}")
    missing_all_diff = len(all_names_df[~all_names_df["Name"].isin(df["Name"])])
    print(f"{missing_all_diff} names from other compilations missing from this compilation")
    common_diff = len(df[~df["Name"].isin(common_names)])
    print(f"{common_diff} names missing from one or more other compilations")
    print()

  for df in dfs:
    name_diffs(df)

  def add_missing_rows(df):
    variant = df.variant
    # Create additional dataset with missing rows
    missing_df = all_names_df[~all_names_df["Name"].isin(df["Name"])].copy()
    missing_df["Cov (B)"] = 0
    missing_df["Scope (B)"] = 1
    missing_df["Cov (L)"] = 0
    missing_df["Scope (L)"] = 1
    missing_df["Flt Cov (L)"] = 0
    missing_df["Src Scope (L)"] = 1
    print(f"Adding {len(missing_df)} missing names to {variant}")
    # Append to existing data and resort
    df = pd.concat(
      [
        df,
        missing_df,
      ],
      ignore_index=True,
    )
    assert len(df) == len(all_names_df), "Names still missing"
    df = df.sort_values("Name", ignore_index=True)
    df.variant = variant
    return df

  # Add any missing rows so that all compilations contain the union of all names
  for (i, df) in enumerate(dfs):
    dfs[i] = add_missing_rows(df)

  # Manufacture virtual data frame representing full coverage
  full_df = dfs[0].copy()
  full_df["Cov (B)"] = full_df["Scope (B)"]
  full_df["Cov (L)"] = full_df["Scope (L)"]
  full_df["Flt Cov (L)"] = full_df["Src Scope (L)"]
  full_df.variant = "Defined region"
  dfs.insert(0, full_df)

  def df_keys(df):
    return df.variant

  compilations_df = pd.concat(
    dfs,
    keys=map(df_keys, dfs),
    names=[
      "Variant",
      "Row",
    ],
  )

  return compilations_df

compilations_df = load_data()

## Normalise

In [None]:
plots.normalise(compilations_df)

## Playground

In [None]:
# df = compilations_df.copy()
# df

## Plot

In [None]:
df = compilations_df.copy()
df["Order"] = df.sort_values(by="FCL / SSL", ascending=False).groupby("Variant").cumcount()
g = sns.relplot(
  df,
  x="Order",
  y="FCL / SSL",
  hue="Variant",
  kind="line",
  height=2.5,
)
sns.move_legend(
  g,
  "upper left",
  bbox_to_anchor=(0.25, 0.925),
  fontsize="small",
  frameon=True,
  shadow=True,
  title=None,
)
g.set(
  title="Issue 38163",
  xlabel="Variable index",
  xbound=(0, df["Order"].max()),
  xticks=[0, 1],
  ylabel="Covered source lines\nnormalised to defined region",
  ybound=(0, 1.002),
)