In [None]:
%load_ext watermark


In [None]:
import itertools as it
import os

from downstream import dstream
import more_itertools as mit
import polars as pl
import seaborn as sns
from teeplot import teeplot as tp

import pylib  # noqa: F401


In [None]:
%watermark -diwmuv -iv


In [None]:
teeplot_subdir = os.environ.get(
    "NOTEBOOK_NAME", "2024-10-20-qos-dstream-vs-naive-tilted"
)
teeplot_subdir


In [None]:
buffer_size = 64
num_items = 10_000


## Define


In [None]:
def calc_qos_from_segment_lengths(segment_lengths: list[int]) -> float:
    segment_total = sum(segment_lengths)
    return max(
        (segment_length - 1) / ((segment_total - cumulative) or 1)
        for cumulative, segment_length in zip(
            it.accumulate([0, *segment_lengths]),
            [*segment_lengths, 1],
        )
    )


In [None]:
def calc_max_gaps_naive_doubling_tilted(
    buffer_size: int, num_ingests: int
) -> list[int]:
    segment_lengths = []
    max_gaps = [0]
    for i in range(num_ingests):

        if (len(segment_lengths) == buffer_size):
            segment_lengths = [
                a + b
                for a, b in mit.batched(segment_lengths, 2)
            ]
        segment_lengths.append(1)

        assert sum(segment_lengths) == i + 1
        max_gaps.append(calc_qos_from_segment_lengths(segment_lengths))

    return max_gaps[:-1]


In [None]:
def calc_max_gaps_dstream(buffer_size: int, num_items: int) -> list[int]:
    return [
        calc_qos_from_segment_lengths(
            [b - a for a, b in mit.pairwise(
            sorted(
                dstream.tilted_algo.lookup_ingest_times_eager(
                    buffer_size, i + 1
                ),
            ))],
        )
        if i >= buffer_size
        else 0
        for i in range(num_items)
    ]


## Example Plot


In [None]:
df = pl.concat(
    [
        pl.DataFrame(
            {
                "Algorithm": "naive tilted",
                "Gap Size Cost": calc_max_gaps_naive_doubling_tilted(
                    buffer_size, num_items
                ),
                "Num Items Ingested": range(num_items),
            },
            strict=False,
        ),
        pl.DataFrame(
            {
                "Algorithm": "dstream tilted",
                "Gap Size Cost": calc_max_gaps_dstream(
                    buffer_size, num_items
                ),
                "Num Items Ingested": range(num_items),
            },
            strict=False,
        ),
    ],
)


In [None]:
with tp.teed(
    sns.relplot,
    df,
    x="Num Items Ingested",
    y="Gap Size Cost",
    hue="Algorithm",
    style="Algorithm",
    aspect=2,
    kind="line",
    height=2.2,
    palette="Set2",
    teeplot_subdir=teeplot_subdir,
) as g:
    sns.move_legend(
        g,
        "lower center",
        bbox_to_anchor=(0.4, 1),
        ncol=2,
        title=None,
        frameon=False,
    )
