## Setup environment

In [2]:
import os
import sys
module_path = os.path.abspath(os.path.join("../.."))
if module_path not in sys.path:
  sys.path.append(module_path)

import numpy as np
import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt

import seaborn as sns
import seaborn.objects as so

import shared.package_plots as plots

## Setup notebook

In [22]:
pd.options.display.max_colwidth = 100
pd.options.display.min_rows = 40

sns.set_theme(
  context="paper",
  style="whitegrid",
  palette="colorblind",
  font="Source Sans Pro",
)

%config InlineBackend.figure_format = "retina"

## Configuration

In [4]:
plots.configure(
  target="git",
  friendly="Git",
  path_prefix="../",
)

## Load data

In [54]:
# compilations_df = plots.load_data()

target_name = "git"
data_path_prefix = "../"

def read_run(file, variant):
  df = pd.read_table(f"{data_path_prefix}{file}")
  # Clean up column names
  df.columns = df.columns.str.strip()
  # Sort by name to aid matching across datasets
  df = df.sort_values("Name", ignore_index=True)
  # Summarise across inlined call sites with arithmetic mean
  df = df.groupby("Name", as_index=False).mean(numeric_only=True)
  return df

o0_15_df = read_run(f"O0-15/{target_name}.tsv", "Clang 15, O0")
o0_15_m2r_df = read_run(f"O0-15-mem2reg/{target_name}.tsv", "Clang 15, O0 + mem2reg")
o0_15_m2r_efb_df = read_run(f"O0-15-mem2reg/{target_name}-efb.tsv", "Clang 15, O0 + mem2reg + KE")
o2_12_df = read_run(f"O2-12/{target_name}.tsv", "Clang 12, O2")
o2_13_df = read_run(f"O2-13/{target_name}.tsv", "Clang 13, O2")
o2_14_df = read_run(f"O2-14/{target_name}.tsv", "Clang 14, O2")
o1_15_df = read_run(f"O1-15/{target_name}.tsv", "Clang 15, O1")
o1_15_efb_df = read_run(f"O1-15/{target_name}-efb.tsv", "Clang 15, O1 + KE")
o2_15_df = read_run(f"O2-15/{target_name}.tsv", "Clang 15, O2")
o2_15_efb_df = read_run(f"O2-15/{target_name}-efb.tsv", "Clang 15, O2 + KE")
o3_15_df = read_run(f"O3-15/{target_name}.tsv", "Clang 15, O3")
o3_15_efb_df = read_run(f"O3-15/{target_name}-efb.tsv", "Clang 15, O3 + KE")

# Manufacture virtual data frame representing full coverage
full_df = o0_15_df.copy()
full_df["Cov (B)"] = full_df["Scope (B)"]
full_df["Cov (L)"] = full_df["Scope (L)"]
full_df["Flt Cov (L)"] = full_df["Src Scope (L)"]
full_df["Adj Cov (L)"] = full_df["Src Scope (L)"]

# Check names present in each compilation for differences
print("# Names")

## Extract unique variables
# ( \
#   export LC_ALL=C; \
#   cat "${SCRIPT_DIR}/source-analysis/${TARGET_NAME}.dbgcov" | \
#   grep DeclScope | \
#   cut -f 4 | \
#   sort -u \
#   > "${SCRIPT_DIR}/source-analysis/variables.tsv" \
# )

src_names_df = pd.read_table(
  f"{data_path_prefix}/source-analysis/variables.tsv",
  names=["Name"],
)
print(f"Source names: {len(src_names_df)}")
print(f"O0 names: {len(o0_15_df)}")

common_names = (
  set(full_df["Name"]) &
  set(o0_15_df["Name"]) &
  set(o0_15_m2r_df["Name"]) &
  set(o0_15_m2r_efb_df["Name"]) &
  set(o2_12_df["Name"]) &
  set(o2_13_df["Name"]) &
  set(o2_14_df["Name"]) &
  set(o1_15_df["Name"]) &
  set(o1_15_efb_df["Name"]) &
  set(o2_15_df["Name"]) &
  set(o2_15_efb_df["Name"]) &
  set(o3_15_df["Name"]) &
  set(o3_15_efb_df["Name"])
)
print(f"Common names: {len(common_names)}")
all_names = (
  set(full_df["Name"]) |
  set(o0_15_df["Name"]) |
  set(o0_15_m2r_df["Name"]) |
  set(o0_15_m2r_efb_df["Name"]) |
  set(o2_12_df["Name"]) |
  set(o2_13_df["Name"]) |
  set(o2_14_df["Name"]) |
  set(o1_15_df["Name"]) |
  set(o1_15_efb_df["Name"]) |
  set(o2_15_df["Name"]) |
  set(o2_15_efb_df["Name"]) |
  set(o3_15_df["Name"]) |
  set(o3_15_efb_df["Name"])
)
all_names_df = pd.DataFrame({ "Name": list(all_names) })
print(f"All names: {len(all_names)}")
print()

def name_diffs(df, variant):
  print(f"## {variant}")
  unexpected_src_diff = len(df[~df["Name"].isin(src_names_df["Name"])])
  assert unexpected_src_diff == 0, "Names not found in source analysis"
  # unexpected_o0_diff = len(df[~df["Name"].isin(o0_15_df["Name"])])
  # print(f"{unexpected_o0_diff} names not found in O0 compilation")
  missing_src_diff = len(src_names_df[~src_names_df["Name"].isin(df["Name"])])
  print(f"{missing_src_diff} source names missing from compilation")
  missing_all_diff = len(all_names_df[~all_names_df["Name"].isin(df["Name"])])
  print(f"{missing_all_diff} all names missing from compilation")
  # missing_o0_diff = len(o0_15_df[~o0_15_df["Name"].isin(df["Name"])])
  # print(f"{missing_o0_diff} O0 names missing from compilation")
  common_diff = len(df[~df["Name"].isin(common_names)])
  print(f"{common_diff} names missing from one or more other compilations")
  print()

name_diffs(full_df, "Defined region")
name_diffs(o0_15_df, "Clang 15, O0")
name_diffs(o0_15_m2r_df, "Clang 15, O0 + mem2reg")
name_diffs(o0_15_m2r_efb_df, "Clang 15, O0 + mem2reg + KE")
name_diffs(o2_12_df, "Clang 12, O2")
name_diffs(o2_13_df, "Clang 13, O2")
name_diffs(o2_14_df, "Clang 14, O2")
name_diffs(o1_15_df, "Clang 15, O1")
name_diffs(o1_15_efb_df, "Clang 15, O1 + KE")
name_diffs(o2_15_df, "Clang 15, O2")
name_diffs(o2_15_efb_df, "Clang 15, O2 + KE")
name_diffs(o3_15_df, "Clang 15, O3")
name_diffs(o3_15_efb_df, "Clang 15, O3 + KE")

def add_missing_rows(df, variant):
  # Create additional dataset with missing rows
  missing_df = all_names_df[~all_names_df["Name"].isin(df["Name"])].copy()
  missing_df["Cov (B)"] = 0
  missing_df["Scope (B)"] = 1
  missing_df["Cov (L)"] = 0
  missing_df["Scope (L)"] = 1
  missing_df["Adj Cov (L)"] = 0
  missing_df["Flt Cov (L)"] = 0
  missing_df["Src Scope (L)"] = 1
  print(f"Adding {len(missing_df)} missing names to {variant}")
  # Append to existing data and resort
  df = pd.concat(
    [
      df,
      missing_df,
    ],
    ignore_index=True,
  )
  assert len(df) == len(all_names_df), "Names still missing"
  return df.sort_values("Name", ignore_index=True)

# Add any missing rows so that all compilations contain the union of all names
full_df = add_missing_rows(full_df, "Defined region")
o0_15_df = add_missing_rows(o0_15_df, "Clang 15, O0")
o0_15_m2r_df = add_missing_rows(o0_15_m2r_df, "Clang 15, O0 + mem2reg")
o0_15_m2r_efb_df = add_missing_rows(o0_15_m2r_efb_df, "Clang 15, O0 + mem2reg + KE")
o2_12_df = add_missing_rows(o2_12_df, "Clang 12, O2")
o2_13_df = add_missing_rows(o2_13_df, "Clang 13, O2")
o2_14_df = add_missing_rows(o2_14_df, "Clang 14, O2")
o1_15_df = add_missing_rows(o1_15_df, "Clang 15, O1")
o1_15_efb_df = add_missing_rows(o1_15_efb_df, "Clang 15, O1 + KE")
o2_15_df = add_missing_rows(o2_15_df, "Clang 15, O2")
o2_15_efb_df = add_missing_rows(o2_15_efb_df, "Clang 15, O2 + KE")
o3_15_df = add_missing_rows(o3_15_df, "Clang 15, O3")
o3_15_efb_df = add_missing_rows(o3_15_efb_df, "Clang 15, O3 + KE")

# Names
Source names: 51505
O0 names: 45955
Common names: 43077
All names: 45959

## Defined region
5550 source names missing from compilation
4 all names missing from compilation
2878 names missing from one or more other compilations

## Clang 15, O0
5550 source names missing from compilation
4 all names missing from compilation
2878 names missing from one or more other compilations

## Clang 15, O0 + mem2reg
6061 source names missing from compilation
515 all names missing from compilation
2367 names missing from one or more other compilations

## Clang 15, O0 + mem2reg + KE
6061 source names missing from compilation
515 all names missing from compilation
2367 names missing from one or more other compilations

## Clang 12, O2
6294 source names missing from compilation
748 all names missing from compilation
2134 names missing from one or more other compilations

## Clang 13, O2
7281 source names missing from compilation
1735 all names missing from compilation
1147 names missing from on

## Playground

In [53]:
print(f"All names: {len(all_names_df)}")
print(f"O2 names: {len(o2_15_df)}")

missing_df = all_names_df[~all_names_df["Name"].isin(o2_15_df["Name"])].copy()
missing_df["Cov (B)"] = 0
missing_df["Scope (B)"] = 1
missing_df["Cov (L)"] = 0
missing_df["Scope (L)"] = 1
missing_df["Adj Cov (L)"] = 0
missing_df["Flt Cov (L)"] = 0
missing_df["Src Scope (L)"] = 1

print(f"Missing names: {len(missing_df)}")
print(f"O2 + missing names: {len(o2_15_df) + len(missing_df)}")

combined_df = pd.concat(
  [
    o2_15_df,
    missing_df,
  ],
  ignore_index=True,
)
combined_df = combined_df.sort_values("Name", ignore_index=True)

combined_df

All names: 45959
O2 names: 44177
Missing names: 1782
O2 + missing names: 45959


Unnamed: 0,Name,Cov (B),Scope (B),Cov (L),Scope (L),Adj Cov (L),Flt Cov (L),Src Scope (L)
0,"BUG_fl, ap, decl usage.c:325",126.000000,126.000000,4.000000,4.000000,4.000000,2.000000,4.0
1,"BUG_fl, file, decl usage.c:323",126.000000,126.000000,4.000000,4.000000,4.000000,4.000000,6.0
2,"BUG_fl, fmt, decl usage.c:323",126.000000,126.000000,4.000000,4.000000,4.000000,4.000000,6.0
3,"BUG_fl, line, decl usage.c:323",126.000000,126.000000,4.000000,4.000000,4.000000,4.000000,6.0
4,"BUG_if_skipped_connectivity_check, cmd, decl receive-pack.c:1817",96.000000,154.000000,4.000000,7.000000,3.000000,3.000000,5.0
5,"BUG_if_skipped_connectivity_check, commands, decl receive-pack.c:1814",154.000000,154.000000,7.000000,7.000000,6.000000,6.000000,6.0
6,"BUG_if_skipped_connectivity_check, si, decl receive-pack.c:1815",154.000000,154.000000,7.000000,7.000000,6.000000,6.000000,6.0
7,"BUG_vfl, file, decl usage.c:304",165.000000,165.000000,9.000000,9.000000,6.000000,7.000000,11.0
8,"BUG_vfl, fmt, decl usage.c:304",165.000000,165.000000,9.000000,9.000000,6.000000,7.000000,11.0
9,"BUG_vfl, in_bug, decl usage.c:307",165.000000,165.000000,9.000000,9.000000,7.000000,6.000000,10.0
