## Setup environment

In [None]:
import os
import sys
module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
  sys.path.append(module_path)

import numpy as np
import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt

import seaborn as sns
import seaborn.objects as so

import shared.package_plots as plots

## Setup notebook

In [None]:
pd.options.display.max_colwidth = 100

sns.set_theme(
  context="paper",
  style="whitegrid",
  palette="colorblind",
  font="Source Sans Pro",
)

%config InlineBackend.figure_format = "retina"

## Configuration

In [None]:
plots.configure(
  target="libflint",
  friendly="FLINT",
)

## Load data

In [None]:
# compilations_df = plots.load_data()

def load_data():
  target_name="libflint"
  data_path_prefix=""

  def read_run(file, variant):
    df = pd.read_table(f"{data_path_prefix}{file}")
    # Clean up column names
    df.columns = df.columns.str.strip()
    # Sort by name to aid matching across datasets
    df = df.sort_values("Name", ignore_index=True)
    # Remove duplicate names (e.g. from macro-generated code with multiple uses of
    # the same variable name)
    df = df.drop_duplicates("Name", keep=False)
    return df

  o0_15_df = read_run(f"O0-15/{target_name}.tsv", "Clang 15, O0")
  o0_15_m2r_df = read_run(f"O0-15-mem2reg/{target_name}.tsv", "Clang 15, O0 + mem2reg")
  o0_15_m2r_efb_df = read_run(f"O0-15-mem2reg/{target_name}-efb.tsv", "Clang 15, O0 + mem2reg + KE")
  # o1_12_df = read_run(f"O1-12/{target_name}.tsv", "Clang 12, O1")
  # o1_13_df = read_run(f"O1-13/{target_name}.tsv", "Clang 13, O1")
  # o1_14_df = read_run(f"O1-14/{target_name}.tsv", "Clang 14, O1")
  # o1_15_df = read_run(f"O1-15/{target_name}.tsv", "Clang 15, O1")
  # o1_15_efb_df = read_run(f"O1-15/{target_name}-efb.tsv", "Clang 15, O1 + KE")
  # o2_15_df = read_run(f"O2-15/{target_name}.tsv", "Clang 15, O2")
  # o2_15_efb_df = read_run(f"O2-15/{target_name}-efb.tsv", "Clang 15, O2 + KE")
  # o3_15_df = read_run(f"O3-15/{target_name}.tsv", "Clang 15, O3")
  # o3_15_efb_df = read_run(f"O3-15/{target_name}-efb.tsv", "Clang 15, O3 + KE")

  # Manufacture virtual data frame representing full coverage
  full_df = o0_15_df.copy()
  full_df["Cov (B)"] = full_df["Scope (B)"]
  full_df["Cov (L)"] = full_df["Scope (L)"]
  full_df["Flt Cov (L)"] = full_df["Src Scope (L)"]
  full_df["Adj Cov (L)"] = full_df["Src Scope (L)"]

  # Restrict all data frames to common names they all share
  common_names = (
    set(full_df["Name"]) &
    set(o0_15_df["Name"]) &
    set(o0_15_m2r_df["Name"]) &
    set(o0_15_m2r_efb_df["Name"]) # &
    # set(o1_12_df["Name"]) &
    # set(o1_13_df["Name"]) &
    # set(o1_14_df["Name"]) &
    # set(o1_15_df["Name"]) &
    # set(o1_15_efb_df["Name"]) &
    # set(o2_15_df["Name"]) &
    # set(o2_15_efb_df["Name"]) &
    # set(o3_15_df["Name"]) &
    # set(o3_15_efb_df["Name"])
  )
  print(f"Common names: {len(common_names)}")
  def common_only(df, variant):
    diff = len(df) - len(df[df["Name"].isin(common_names)])
    print(f"Dropped {diff} unique names from {variant}")
    return df[df["Name"].isin(common_names)]
  full_df = common_only(full_df, "Defined region")
  o0_15_df = common_only(o0_15_df, "Clang 15, O0")
  o0_15_m2r_df = common_only(o0_15_m2r_df, "Clang 15, O0 + mem2reg")
  o0_15_m2r_efb_df = common_only(o0_15_m2r_efb_df, "Clang 15, O0 + mem2reg + KE")
  # o1_12_df = common_only(o1_12_df, "Clang 12, O1")
  # o1_13_df = common_only(o1_13_df, "Clang 13, O1")
  # o1_14_df = common_only(o1_14_df, "Clang 14, O1")
  # o1_15_df = common_only(o1_15_df, "Clang 15, O1")
  # o1_15_efb_df = common_only(o1_15_efb_df, "Clang 15, O1 + KE")
  # o2_15_df = common_only(o2_15_df, "Clang 15, O2")
  # o2_15_efb_df = common_only(o2_15_efb_df, "Clang 15, O2 + KE")
  # o3_15_df = common_only(o3_15_df, "Clang 15, O3")
  # o3_15_efb_df = common_only(o3_15_efb_df, "Clang 15, O3 + KE")

  # Order is important here!
  # Some data transformations rely on
  # `iloc[1]` to access the baseline,
  # `diff` to access KE vs. not, etc.
  # Re-check all transformations when changing the order.
  compilations_df = pd.concat([
    full_df,
    o0_15_df,
    o0_15_m2r_df,
    o0_15_m2r_efb_df,
    # o1_12_df,
    # o1_13_df,
    # o1_14_df,
    # o1_15_df,
    # o1_15_efb_df,
    # o2_15_df,
    # o2_15_efb_df,
    # o3_15_df,
    # o3_15_efb_df,
  ], keys=[
    "Defined region",
    "Clang 15, O0",
    "Clang 15, O0 + mem2reg",
    "Clang 15, O0 + mem2reg + KE",
    # "Clang 12, O1",
    # "Clang 13, O1",
    # "Clang 14, O1",
    # "Clang 15, O1",
    # "Clang 15, O1 + KE",
    # "Clang 15, O2",
    # "Clang 15, O2 + KE",
    # "Clang 15, O3",
    # "Clang 15, O3 + KE",
  ], names=[
    "Variant",
    "Row",
  ])

  return compilations_df

compilations_df = load_data()

## Normalise

In [None]:
plots.normalise(compilations_df)

# compilations_df.to_csv("normalised.tsv", sep="\t")

## Sanity check

In [None]:
df = compilations_df.copy()

print("Rows with no scope lines:", df[df["Src Scope (L)"] == 0].shape[0])
print("Rows with >100% coverage:", df[df["FCL / SSL"] > 1].shape[0])

# assert df[df["Src Scope (L)"] == 0].shape[0] == 0, "Rows with no scope lines"
assert df[df["FCL / SSL"] > 1].shape[0] == 0, "Rows with >100% coverage"

## Playground

In [None]:
# df = compilations_df.copy()

# variants = df.index.get_level_values("Variant")

# fdf = df[variants.str.fullmatch("Clang 15, O0")]
# fdf[fdf["Src Scope (L)"] == 0].shape[0] / fdf.shape[0]

## Coverage by compiler version

In [None]:
plots.coverage_by_compiler_version(df)

## Coverage by optimisation level

In [None]:
plots.coverage_by_optimisation_level(df)

## Experiment: Coverage with knowledge extension, sorted independently

In [None]:
plots.coverage_with_ke_sorted_independently(df)

## Experiment: Coverage difference with knowledge extension, absolute, O1

In [None]:
plots.coverage_difference_with_ke_absolute_o1(df)

## Experiment: Coverage difference with knowledge extension, ratios, O1

In [None]:
plots.coverage_difference_with_ke_ratios_o1(df)

## Experiment: Coverage difference with knowledge extension, O1, distribution

In [None]:
plots.coverage_difference_with_ke_distribution_o1(df)

## Experiment: Coverage with knowledge extension, O1, sorted consistently

In [None]:
plots.coverage_with_ke_o1_sorted_consistently(df)

## Coverage achievability

In [None]:
plots.coverage_achievability(df)

## Coverage metric comparison: Ratios, O0, sorted independently

In [None]:
plots.coverage_comparison_ratios_o0_sorted_independently(df)

## Coverage metric comparison: Ratios, O1, sorted independently

In [None]:
plots.coverage_comparison_ratios_o1_sorted_independently(df)

## Coverage metric comparison: Ratios, O1, sorted consistently

### Old metric

In [None]:
plots.coverage_comparison_ratios_o1_sorted_consistently_old_metric(df)

### New metric, same denominators

In [None]:
plots.coverage_comparison_ratios_o1_sorted_consistently_new_metric_same_denominators(df)

### New metric, different denominators, max scope

In [None]:
plots.coverage_comparison_ratios_o1_sorted_consistently_new_metric_different_denominators_max_scope(df)

### New metric, different denominators, own scope

In [None]:
plots.coverage_comparison_ratios_o1_sorted_consistently_new_metric_different_denominators_own_scope(df)

### New metric, different denominators, bytes

In [None]:
plots.coverage_comparison_ratios_o1_sorted_consistently_new_metric_different_denominators_bytes(df)

## Coverage metric comparison: Ratios, O1, distribution

In [None]:
plots.coverage_comparison_ratios_o1_distribution(df)

## Arith. mean coverage

In [None]:
plots.arithmetic_mean_coverage(df)