In [None]:
%load_ext watermark


In [None]:
import itertools as it
import os

from downstream import dstream
import matplotlib as mpl
import polars as pl
import seaborn as sns
from teeplot import teeplot as tp

import pylib  # noqa: F401


In [None]:
%watermark -diwmuv -iv


In [None]:
tp.save[".pgf"] = True
teeplot_subdir = os.environ.get(
    "NOTEBOOK_NAME", "2024-10-20-qos-dstream-vs-naive-steady"
)
teeplot_subdir


In [None]:
num_items = 10_000


## Define


In [None]:
def calc_qos_from_segment_lengths(segment_lengths: list[int]) -> float:
    return max(segment_lengths)


In [None]:
def calc_max_gaps_doubling_steady(
    buffer_size: int, num_ingests: int
) -> list[int]:
    s = int(buffer_size).bit_length()
    return [
        2**max(i.bit_length() - s + 2, 0) - 1
        for i in range(num_ingests)
    ]


In [None]:
def calc_max_gaps_naive_zhao_2006(
    buffer_size: int, num_ingests: int
) -> list[int]:
    segment_lengths = []
    max_gaps = []
    for i in range(num_ingests):
        if len(segment_lengths) < buffer_size:
            segment_lengths.append(1)
            max_gaps.append(max(segment_lengths) - 1)
            continue
        if (
            len(segment_lengths) > 2
            and segment_lengths[-1] < segment_lengths[-2]
        ):
            segment_lengths[-1] += 1
        else:
            target = min(
                range(buffer_size - 1),
                key=lambda i: segment_lengths[i] + segment_lengths[i + 1],
            )
            segment_lengths[target] += segment_lengths[target + 1]
            segment_lengths.pop(target + 1)
            segment_lengths.append(1)

        assert sum(segment_lengths) == i + 1
        max_gaps.append(max(segment_lengths) - 1)
    return max_gaps


In [None]:
def calc_max_gaps_dstream(buffer_size: int, num_items: int) -> list[int]:
    return [
        max(
            b - a - 1
            for a, b in it.pairwise(
                [
                    *sorted(
                        dstream.steady_algo.lookup_ingest_times_eager(
                            buffer_size, i + 1
                        ),
                    ),
                    i,
                ],
            )
        )
        if i >= buffer_size
        else 0
        for i in range(num_items)
    ]


In [None]:
def calc_max_gaps_gunther(buffer_size: int, num_items: int) -> list[int]:
    return [
        max(
            b - a - 1
            for a, b in it.pairwise(
                [
                    *sorted(
                        dstream.compressing_algo.lookup_ingest_times_eager(
                            buffer_size, i + 1
                        ),
                    ),
                    i,
                ],
            )
        )
        if i >= buffer_size
        else 0
        for i in range(num_items)
    ]


## Example Plot


In [None]:
def make_df(buffer_size: int) -> pl.DataFrame:
    return pl.concat(
        [
            pl.DataFrame(
                {
                    "Algorithm": "zhao steady",
                    "Max Gap Size": calc_max_gaps_naive_zhao_2006(
                        buffer_size, num_items
                    ),
                    "Num Items Ingested": range(num_items),
                },
            ),
            pl.DataFrame(
                {
                    "Algorithm": "doubling steady",
                    "Max Gap Size": calc_max_gaps_doubling_steady(
                        buffer_size, num_items
                    ),
                    "Num Items Ingested": range(num_items),
                },
            ),
            pl.DataFrame(
                {
                    "Algorithm": "gunther steady",
                    "Max Gap Size": calc_max_gaps_gunther(buffer_size, num_items),
                    "Num Items Ingested": range(num_items),
                },
            ),
        ],
    )


In [None]:
for buffer_size in [64, 256, 1024, 4096]:
    df = make_df(buffer_size)
    for rc in [{}, {"font.family": "serif"}]:
        with mpl.rc_context(rc=rc):
            with tp.teed(
                sns.relplot,
                df,
                x="Num Items Ingested",
                y="Max Gap Size",
                hue="Algorithm",
                hue_order=[
                    "gunther steady",
                    "doubling steady",
                    "zhao steady",
                ],
                style="Algorithm",
                aspect=2.8,
                kind="line",
                height=1.8,
                palette="Set2",
                teeplot_outattrs=rc,
                teeplot_subdir=teeplot_subdir,
            ) as g:
                sns.move_legend(
                    g,
                    "lower center",
                    bbox_to_anchor=(0.4, 1),
                    ncol=3,
                    title=None,
                    frameon=False,
                )
