# Exact Results

In [1]:
import shutil
from pathlib import Path

import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from matplotlib.transforms import Bbox

from fainder.utils import load_input, configure_run
from utils.plotting_defaults import set_style, plot_legend

configure_run("WARNING")
set_style()
Path("plots/exact_results").mkdir(parents=True, exist_ok=True)

In [2]:
try:
    shutil.rmtree(Path.home() / ".cache" / "matplotlib" / "tex.cache")
except FileNotFoundError:
    pass

## Data Loading

In [3]:
execution_list = []
for logfile in Path("../logs/exact_results/").glob("*.zst"):
    config = logfile.stem.split("-")
    data = load_input(logfile)

    data["dataset"] = config[0]
    execution_list.append(data)

execution = pd.DataFrame(
    execution_list,
    columns=[
        "dataset",
        "precision_time",
        "recall_time",
        "iterative_time",
        "baseline_time",
        "avg_reduction",
    ],
)

In [4]:
execution[execution["dataset"] == "sportstables"]

Unnamed: 0,dataset,precision_time,recall_time,iterative_time,baseline_time,avg_reduction
2,sportstables,2.298845,2.186126,5.16504,191.859948,0.980537
6,sportstables,2.323815,2.563646,5.098928,186.768256,0.980537
11,sportstables,2.2431,2.579596,5.126647,183.598992,0.980537
12,sportstables,2.352899,2.56888,5.227368,193.343132,0.980537
13,sportstables,2.241156,2.548854,5.194353,196.01891,0.980537


In [5]:
execution.groupby("dataset").agg({"avg_reduction": "mean"})

Unnamed: 0_level_0,avg_reduction
dataset,Unnamed: 1_level_1
gittables,0.982878
open_data_usa,0.93012
sportstables,0.980537


## Plotting

In [6]:
height = 1.1
handles = []
for dataset in ["sportstables", "open_data_usa", "gittables"]:
    data = execution[execution["dataset"] == dataset]
    precision_time = data["precision_time"].mean()
    recall_time = data["recall_time"].mean()
    iterative_time = data["iterative_time"].mean()
    baseline_time = data["baseline_time"].mean()

    fig, ax = plt.subplots(figsize=(1.2, height))
    colors = [sns.color_palette()[0], sns.color_palette()[1], sns.color_palette()[2]]
    hatches = ["///", "xxx", "ooo"]

    ax.bar(
        0,
        baseline_time,
        width=0.5,
        color=colors[0],
        edgecolor="black",
        hatch=hatches[0],
        label="Profile scan",
    )

    bottom = 0
    for i, time in enumerate([recall_time, precision_time, iterative_time]):
        handles += ax.bar(
            0.75,
            time,
            bottom=bottom,
            width=0.5,
            color=colors[-(i + 1)],
            edgecolor="black",
            hatch=hatches[-(i + 1)],
        )
        bottom += time

    ax.set_xticks([0, 0.75])
    ax.set_xticklabels(["Full scan", r"\textsc{F. Exact}"])
    if dataset == "gittables":
        ax.set_ylim(200, 90000)
    else:
        ax.set_ylim(recall_time / 2 % 10, ax.get_ylim()[1] * 2)
    ax.set_yscale("log")
    ax.annotate(
        f"{baseline_time:.0f}",
        xy=(0, baseline_time),  # type: ignore
        xytext=(0, 1),
        fontsize=mpl.rcParams["font.size"] * 0.8,
        textcoords="offset points",
        ha="center",
        va="bottom",
    )
    ax.annotate(
        f"{bottom:.4g}",
        xy=(0.75, bottom),  # type: ignore
        xytext=(0, 1),
        fontsize=mpl.rcParams["font.size"] * 0.8,
        textcoords="offset points",
        ha="center",
        va="bottom",
    )

    sns.despine()
    bbox = fig.get_tightbbox()

    plt.tight_layout(pad=1.02)
    plt.savefig(
        f"plots/exact_results/{dataset}.pdf",
        bbox_inches="tight",
        pad_inches=0.01,
    )

    ax.set_ylabel("Time (s)")
    bbox = fig.get_tightbbox()
    label_bbox = Bbox(((bbox.x0, bbox.y0), (0.07, bbox.y1)))
    plt.savefig(f"plots/exact_results/ylabel.pdf", bbox_inches=label_bbox)
    plt.close()

plot_legend(
    "plots/exact_results/legend.pdf",
    handles[::-1],
    [r"\pscan", r"\approximate{} full prec.", r"\approximate{} full rec."],
    ncol=3,
)

## Conceptual Diagram for Fainder Exact

In [7]:
# NOTE: This version of the diagram needs to be manually edited in Inkscape afterwards to reproduce
# the excact figure from the paper

import plotly.graph_objects as go

red = "rgb" + str(sns.color_palette()[3])
green = "rgb" + str(sns.color_palette()[2])
light_gray = "rgba" + str((*sns.color_palette()[7], 0.2))
alpha_red = "rgba" + str((*sns.color_palette()[3], 0.6))
alpha_green = "rgba" + str((*sns.color_palette()[2], 0.6))
alpha_gray = "rgba" + str((*sns.color_palette()[7], 0.6))

fig = go.Figure(
    data=[
        go.Sankey(
            node=dict(
                pad=5,
                thickness=30,
                line=dict(color="black", width=0.0),
                color=[light_gray, light_gray, light_gray, green, red],
                x=[0, 0.33, 0.66, 0.966, 0.966],
                y=[0.0, 0.77, 0.55, 0.79, 0.26],
            ),
            link=dict(
                source=[0, 0, 1, 1, 2, 2],
                target=[1, 4, 3, 2, 3, 4],
                value=[0.5, 0.5, 0.4, 0.1, 0.05, 0.05],
                color=[alpha_gray, alpha_red, alpha_green, alpha_gray, alpha_green, alpha_red],
            ),
            arrangement="perpendicular",
            orientation="h",
            valuesuffix="",
        )
    ],
    layout=dict(
        font={"size": 6, "family": "Computer Modern", "color": "#000"},
        margin={"l": 0, "r": 0, "b": 0, "t": 0},
        width=300,
        height=100,
    ),
)
fig.write_image("diagrams/fainder_exact.svg")