### Utils

In [None]:
import pandas as pd
import numpy as np
import sqlite3 as sql
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt

sns.set_theme(context='notebook', style='white', font_scale=1.5)

matplotlib.rcParams['axes.linewidth'] = 1.5

# https://matplotlib.org/stable/users/explain/text/fonts.html
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

conn = sql.connect("../../research-data/railcar/issta-2026/issta-bounded/metrics.db")

In [None]:
PROJECTS = list(pd.read_sql("SELECT labels FROM heartbeat", conn)['labels'].str.split(',', expand=True)[0].unique())
TOTAL_EDGES = pd.read_sql("""
SELECT labels, total_edges
FROM (
    SELECT
        labels,
        total_edges,
        ROW_NUMBER() OVER (
            PARTITION BY labels
            ORDER BY timestamp
        ) AS rn
    FROM heartbeat
    WHERE total_edges <> 0
)
WHERE rn = 1;
""", conn).set_index('labels')['total_edges']

In [None]:
def get_coverage_df():
    df = pd.read_sql("SELECT coverage, labels FROM heartbeat", conn).join(TOTAL_EDGES, on='labels')
    return split_labels(df)

# split labels into columns
def split_labels(df):
    df = df.join(df['labels'].str.split(",", expand=True).rename(columns={0: "project", 1: "mode", 2: "schema", 3: "driver", 4: "iter"}))
    df['iter'] = df['iter'].astype(int)
    return df

# Overall Coverage

In [None]:
COVERAGE = get_coverage_df()
ITERATIONS = 8

In [None]:
df = COVERAGE
df['pct'] = 100 * df['coverage'] / df['total_edges']
df.groupby(['project', 'mode', 'schema'])['pct'].mean().round(2)