## Setup environment

In [None]:
import os
import sys
module_path = os.path.abspath(os.path.join("../.."))
if module_path not in sys.path:
  sys.path.append(module_path)

import numpy as np
import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt

import seaborn as sns
import seaborn.objects as so

import shared.package_plots as plots

## Setup notebook

In [None]:
pd.options.display.max_colwidth = 100

sns.set_theme(
  context="paper",
  style="whitegrid",
  palette="colorblind",
  font="Source Sans Pro",
)

%config InlineBackend.figure_format = "retina"

## Configuration

In [None]:
plots.configure(
  target="git",
  friendly="Git",
  path_prefix="../",
)

## Load data

In [None]:
compilations_df = plots.load_data()

## Normalise

In [None]:

plots.normalise(compilations_df)

# compilations_df.to_csv("normalised.tsv", sep="\t")

## Sanity check

In [None]:
df = compilations_df.copy()

print("Rows with no scope lines:", df[df["Src Scope (L)"] == 0].shape[0])
print("Rows with >100% coverage:", df[df["FCL / SSL"] > 1].shape[0])

# assert df[df["Src Scope (L)"] == 0].shape[0] == 0, "Rows with no scope lines"
assert df[df["FCL / SSL"] > 1].shape[0] == 0, "Rows with >100% coverage"

## Playground

In [None]:
# df = compilations_df.copy()
# variants = df.index.get_level_values("Variant")

# o1_df = df[variants.str.fullmatch("Clang 15, O1")]

## Comparing raw lines to filtered lines
# 6463 filtered away
# Working on xsnprintf, ap, decl wrapper.c:681, unit wrapper.c
# Now treating call arguments as may be defined
# 4763 filtered away
# Missing computation region also clears line sets
# 4782 filtered away
# o1_df[o1_df["Cov (L)"] > 0][o1_df["Flt Cov (L)"] == 0]

## Checking source-based scope lines
# 4627 with no scope lines
# Working on xdl_trim_ends, lim, decl xprepare.c:427, unit xdiff/xprepare.c
# Added multiple comma-separated assignments
# by looking up multiple levels for the nearest Stmt ancestor
# 900 with no scope lines
# Working on wt_status_check_rebase, st, decl wt-status.c:1682, unit wt-status.c
# Fixed call argument iteration
# 666 with no scope lines
# Working on xdl_num_out, buf, decl xutils.c:323, unit xdiff/xutils.c
# Added defined regions for pointer assignments
# 628 with no scope lines
# Working on kwsincr, dirs, decl kwset.c:138, unit kwset.c
# Added descent into tree on left-hand side of assignments
# 620 with no scope lines
# Working on tmp_objdir_create, installed_handlers, decl tmp-objdir.c:115, unit
# tmp-objdir.c
# Added static local computation and definition regions
# 615 with no scope line
# Working on add_cmdname, flex_array_len_, decl help.c:152, unit help.c
# Fixed next line adjustment for one-line regions
# Ignored multi-file regions (e.g. variable declared via #include mid-function)
# 10 with no scope lines
# Working on cmd_grep, dummy, decl grep.c:882, unit builtin/grep.c
# Added all right-hand side assignment and initialiser variables
# 7 with no scope lines
# Working on dereference, unused, decl fast-import.c:3046, unit
# builtin/fast-import.c
# Included current line when referencing existing variables
# 4 with no scope lines
# Remaining issues seem to be "always inline"-related

# o1_df[o1_df["Src Scope (L)"] == 0]