## Setup environment

In [None]:
import os
import sys
module_path = os.path.abspath(os.path.join("../.."))
if module_path not in sys.path:
  sys.path.append(module_path)

import numpy as np
import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt

import seaborn as sns
import seaborn.objects as so

import shared.package_plots as plots

## Setup notebook

In [None]:
pd.options.display.max_colwidth = 100
pd.options.display.min_rows = 20

sns.set_theme(
  context="paper",
  style="whitegrid",
  palette="colorblind",
  font="Source Sans Pro",
)

%config InlineBackend.figure_format = "retina"

## Configuration

In [None]:
plots.configure(
  target="git",
  friendly="Git",
  path_prefix="../",
)

## Load data

In [None]:
compilations_df = plots.load_data()

## Normalise

In [None]:

# plots.normalise(compilations_df)

# compilations_df.to_csv("normalised.tsv", sep="\t")

## Sanity check

In [None]:
# df = compilations_df.copy()

# print("Rows with no scope lines:", df[df["Src Scope (L)"] == 0].shape[0])
# print("Rows with >100% coverage:", df[df["FCL / SSL"] > 1].shape[0])

# assert df[df["Src Scope (L)"] == 0].shape[0] == 0, "Rows with no scope lines"
# assert df[df["FCL / SSL"] > 1].shape[0] == 0, "Rows with >100% coverage"

## Playground

In [None]:
df = compilations_df.copy()
variants = df.index.get_level_values("Variant")


df["Instance"] = df["Instance"].replace(np.nan, "<orig>")
df["Instances"] = df.groupby(["Variant", "Name"]).transform("count")["Instance"]

o1_df = df[variants.str.fullmatch("Clang 15, O1")]

o1_inl = o1_df.groupby("Name").filter(lambda g: (g["Instance"] != "<orig>").any())
o1_inl = o1_inl.groupby("Name", as_index=False).mean(numeric_only=True)
o1_inl_count = o1_inl.shape[0]

print(f"Variables with inlining: {o1_inl_count}")

o1_all = o1_df.groupby("Name", as_index=False).mean(numeric_only=True)
o1_all_count = o1_all[o1_all["Instances"] > 1].shape[0]

print(f"Variables with multiple instances: {o1_all_count}")

o1_orig = o1_df.groupby("Name").filter(lambda g: (g["Instance"] == "<orig>").any())
o1_orig = o1_orig.groupby("Name", as_index=False).mean(numeric_only=True)
o1_orig_count = o1_orig[o1_orig["Instances"] > 1].shape[0]

print(f"Variables with multiple instances incl. original: {o1_orig_count}")