# SlidingWindowSampler

In [None]:
%config InteractiveShell.ast_node_interactivity='last_expr_or_assign'  # always print last expr.
%config InlineBackend.figure_format = 'svg'
%load_ext autoreload
%autoreload 2
%matplotlib inline

import logging

logging.basicConfig(level=logging.INFO)

In [None]:
import numpy as np

np.set_printoptions(precision=4, floatmode="fixed", suppress=True)
rng = np.random.default_rng()

In [None]:
from tsdm.random.samplers import BaseSampler

In [None]:
from collections.abc import Iterable, Sequence
from typing import Generic, Literal, Optional, TypeVar, Union

In [None]:
from datetime import datetime as py_dt, timedelta as py_td

from numpy import (
    datetime64 as np_dt,
    floating as np_float,
    integer as np_int,
    timedelta64 as np_td,
)
from pandas import Timedelta as pd_td, Timestamp as pd_dt

TimestampLike = TypeVar("TimestampLike", py_dt, np_dt, pd_dt)
TimedeltaLike = TypeVar("TimedeltaLike", py_td, np_td, pd_td)

TimestampLike = TypeVar("TimestampLike", py_dt, np_dt, pd_dt)
TimedeltaLike = TypeVar("TimedeltaLike", py_td, np_td, pd_td)

TimeLike = TypeVar(
    "TimeLike", int, float, np_int, np_float, py_dt, np_dt, pd_dt, py_td, np_td, pd_td
)

In [None]:
t0 = np.timedelta64(5, "s")
t1 = np.timedelta64(3, "s")

max(t0, t1)

In [None]:
from tsdm.random.samplers._samplers import grid

In [None]:
class SlidingWindowSampler(BaseSampler, Generic[TimeLike]):
    r"""Sampler that generates sliding windows over an interval.

    The `SlidingWindowSampler` generates tuples.

    Inputs:
    - Ordered timestamps T
    - Starting time t_0
    - Final time t_f
    - stride ∆t (how much the sampler advances at each step)
        default, depending on data type of T:
           - integer: GCD(∆T)
           - float: max(⌊AVG(∆T)⌋, ε)
           - timestamp: resolution dependent.
    - horizons: TimeDelta or Tuple[TimeDelta]

    The sampler will return tuples of len(horizons)+1.
    """

    shuffle: bool
    return_mask: bool
    return_slice: bool
    total_horizon: TimeLike

    def __init__(
        self,
        data_source: Sequence[TimeLike],
        /,
        *,
        stride: TimeLike,
        horizons: Union[TimeLike, Sequence[TimeLike]],
        shuffle: bool = False,
        tmin: Optional[TimeLike] = None,
        tmax: Optional[TimeLike] = None,
        mode: Literal["masks", "slices", "points"] = "masks",
    ):
        super().__init__(data_source)
        self.shuffle = shuffle

        if not isinstance(horizons, Iterable):
            horizons = [horizons]
        else:
            horizons = list(horizons)

        self.horizons = np.array(horizons)
        self.total_horizon = sum(self.horizons)
        self.mode = mode
        self.stride = stride

        self.tmin = self.data[0] if tmin is None else tmin
        self.tmax = self.data[-1] if tmax is None else tmax

        # this gives us the correct zero, depending on the dtype
        self.zero_td = self.tmin - self.tmin
        self.zero_dt = 0 * self.tmin

        assert self.stride > self.zero_dt, "stride cannot be zero."

        print(self.horizons)
        cumulative_horizons = np.concatenate([[self.zero_td], self.horizons])
        cumulative_horizons = np.cumsum(cumulative_horizons)

        self.start_values = self.tmin + cumulative_horizons

        # precompute the possible slices
        print(tmin, tmax, self.total_horizon)
        self.grid = np.array(grid(self.tmin, self.tmax, self.total_horizon))

    def __len__(self):
        return len(self.data)

    @staticmethod
    def __make__points__(vals):
        return vals

    @staticmethod
    def __make__slices__(vals):
        return tuple(slice(x, y) for x, y in zip(vals[:-1], vals[1:]))

    def __make__masks__(self, vals):
        return tuple(
            (x <= self.data) & (self.data < y) for x, y in zip(vals[:-1], vals[1:])
        )

    def __iter__(self):
        """Iterate through.

        For each k, we return a tuple:

        if return_stops:
        - $(x₀ + k⋅∆t, x₁+k⋅∆t, …, xₘ+k⋅∆t)$
        if return_slices:
        - $(slice(x₀ + k⋅∆t, x₁+k⋅∆t), …, slice(xₘ₋₁+k⋅∆t, xₘ+k⋅∆t))$
        if return_masks:
        - $(mask₁, …, maskₘ$

        """
        yield_fn = {
            "masks": self.__make__masks__,
            "points": self.__make__points__,
            "slices": self.__make__slices__,
        }[self.mode]

        if self.shuffle:
            perm = np.random.permutation(len(self.grid))
            for k in self.grid[perm]:
                vals = self.start_values + k * self.stride
                yield yield_fn(vals)
            return

        # faster non-shuffle code path
        vals = self.start_values
        for k in self.grid:
            vals += self.stride
            yield yield_fn(vals)

In [None]:
td = np.arange(100)

obj = SlidingWindowSampler(td, stride=1, horizons=[5, 3], shuffle=True, mode="points")
obj.start_values

## Manual execution

In [None]:
import datetime as dt
from datetime import datetime as py_dt, timedelta as py_td
from typing import Sequence, cast

import numpy as np
import pandas as pd
from numpy.typing import NDArray
from pandas import DataFrame, Series, Timedelta, Timestamp

from tsdm.random.samplers import grid
from tsdm.utils.types.time import NumpyDTVar, NumpyTDVar

In [None]:
stride = "5m"
mode = "points"
horizons = ["15m", "30m"]
shuffle = False

tds = pd.Series(pd.to_timedelta(np.random.rand(200), "m"))
tmin = pd.Timestamp(0)
tmax = tmin + pd.Timedelta(2, "h")
T = pd.concat([Series([tmin]), tmin + tds.cumsum(), Series([tmax])])
T = T.reset_index(drop=True)

tmin = None
tmax = None

In [None]:
class Dummy: ...


self = Dummy()

self.data = T

# coerce non-numpy types to numpy.
horizons = Timedelta(horizons) if isinstance(horizons, str) else horizons
stride = Timedelta(stride) if isinstance(stride, str) else stride
tmin = Timestamp(tmin) if isinstance(tmin, str) else tmin
tmax = Timestamp(tmax) if isinstance(tmax, str) else tmax

self.shuffle = shuffle
self.mode = mode
self.stride = stride

if tmin is None:
    if isinstance(self.data, (Series, DataFrame)):
        self.tmin = self.data.iloc[0]
    else:
        self.tmin = self.data[0]
else:
    self.tmin = tmin

if tmax is None:
    if isinstance(self.data, (Series, DataFrame)):
        self.tmax = self.data.iloc[-1]
    else:
        self.tmax = self.data[-1]
else:
    self.tmax = tmax

# this gives us the correct zero, depending on the dtype
self.zero_td = cast(NumpyTDVar, self.tmin - self.tmin)
assert self.stride > self.zero_td, "stride cannot be zero."

In [None]:
self.horizons

In [None]:
self.horizons.insert(0, self.zero_td)

In [None]:
if isinstance(horizons, Sequence):
    self.multi_horizon = True
    if isinstance(horizons[0], (str, Timedelta, py_td)):
        self.horizons = pd.to_timedelta(horizons)
        self.cumulative_horizons = 
    else:
        self.horizons = np.array(horizons)

    self.total_horizon = self.horizons.sum()
    self.cumulative_horizons = np.cumsum([[self.zero_td], self.horizons])

else:
    self.multi_horizon = False
    self.horizons = horizons
    self.total_horizon = self.horizons
    self.cumulative_horizons = np.cumsum([self.zero_td, self.horizons])

In [None]:
self.data

In [None]:
self.cumulative_horizons

In [None]:
self.start_values = cast(
    NDArray[NumpyDTVar],
    self.tmin + self.cumulative_horizons,  # type: ignore[call-overload, operator]
)

self.offset = cast(
    NumpyDTVar,
    self.tmin + self.total_horizon,  # type: ignore[call-overload, operator]
)

# precompute the possible slices
self.grid = np.array(grid(self.tmin, self.tmax, self.stride))

In [None]:
dt.datetime(2020, 1, 1) + dt.timedelta(1)

In [None]:
cumulative_horizons = np.cumsum(cumulative_horizons)

self.start_values = cast(
    NDArray[NumpyDTVar],
    self.tmin + cumulative_horizons,  # type: ignore[call-overload, operator]
)

self.offset = cast(
    NumpyDTVar,
    self.tmin + self.total_horizon,  # type: ignore[call-overload, operator]
)

# precompute the possible slices
self.grid = np.array(grid(self.tmin, self.tmax, self.stride))

In [None]:
self.horizons

In [None]:
np.array(self.tmin)

In [None]:
self.tmax

In [None]:
self.tmin

In [None]:
cumulative_horizons

In [None]:
self.offset