In [1]:
%load_ext jupyter_black

In [2]:
from __future__ import annotations

import os
import abc
from typing import *  # type: ignore

from typing_extensions import Self
import numpy as np
import tensorflow as tf

if TYPE_CHECKING:
    # from keras.api._v2.keras import Model, layers
    from keras import Model, layers  # type: ignore
else:
    from tensorflow.keras import Model, layers  # type: ignore

from sevirs._typing import Array, Tensor, Nd, B, C, W, L, T

2023-08-16 08:52:48.169562: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
SHAPE = BATCH, CHANNELS, WIDTH, LENGTH, TIME = (15, 3, 256, 256, 49)
a = np.arange(np.prod(SHAPE)).reshape(SHAPE)
_T = TypeVar("_T")

# def ar() -> Tensor[Nd[B, C, W, L, T], np.int64]:
#     ...

# Abstract Base Classes

In [4]:
class IOMixin(abc.ABC):
    @abc.abstractmethod
    def to_disk(self, path: os.PathLike) -> None:
        ...

    @classmethod
    @abc.abstractmethod
    def from_disk(cls: Type[Self], path: os.PathLike) -> Self:
        ...

# IterableDataset

In [9]:
IndexT = TypeVar("IndexT")
ValueT = TypeVar("ValueT")
import logging
import tqdm
import polars as pl
from sevirs.utils.sampler import Sampler

class DataGenerator(Iterable[ValueT], Generic[IndexT, ValueT]):
    def __init__(self, indices: Iterable[IndexT]) -> None:
        super().__init__()
        self.indices: Final[tuple[IndexT, ...]] = tuple(indices)

    # =================================================================================================================
    # - abstract methods
    @abc.abstractmethod
    def get(self, index: IndexT) -> ValueT:
        ...

    @abc.abstractmethod
    def get_metadata(self, img_id: IndexT | None = None) -> pl.DataFrame:
        ...

    # =================================================================================================================
    # - overloads
    @overload  # type: ignore[misc]
    def select(self, index: IndexT, *, metadata: Literal[False] = False) -> ValueT:
        ...

    @overload
    def select(self, index: IndexT, *, metadata: Literal[True] = True) -> tuple[ValueT, pl.DataFrame]:
        ...

    def select(self, index: IndexT, *, metadata: bool = False) -> ValueT | tuple[ValueT, pl.DataFrame]:
        values = self.get(index)
        if metadata:
            return values, self.get_metadata(index)
        return values

    @overload  # type: ignore[misc]
    def iterate(self, *, metadata: Literal[False] = False) -> Iterable[ValueT]:
        ...

    @overload
    def iterate(self, *, metadata: Literal[True] = True) -> Iterable[tuple[ValueT, pl.DataFrame]]:
        ...

    def iterate(self, *, metadata: bool = False) -> Iterable[ValueT | tuple[ValueT, pl.DataFrame]]:
        logging.info("🏃 Iterating over Dataset 🏃")
        for index in tqdm.tqdm(self.indices):
            yield self.select(index, metadata=metadata)  # type: ignore[call-overload]

    # =================================================================================================================
    # - dunder methods
    def __getitem__(self, idx: int) -> ValueT:
        return self.get(self.indices[idx])

    def __iter__(self) -> Iterator[ValueT]:
        yield from self.iterate(metadata=False)

    def __len__(self) -> int:
        return len(self.indices)


class Dataloader(Generic[ValueT]):
    def __init__(
        self,
        data: DataGenerator[Any, ValueT],
        dataset: DataGenerator,
        batch_size: int | None = 1,
        shuffle: bool | None = None,
        sampler: Sampler | Iterable | None = None,
        batch_sampler: Sampler[Sequence] | Iterable[Sequence] | None = None,
        num_workers: int = 0,
        collate_fn: Callable[[list[TensorPair]], Any] | None = None,
        pin_memory: bool = False,
        drop_last: bool = False,
        timeout: float = 0,
        worker_init_fn: Callable[[int], None] | None = None,
        multiprocessing_context=None,
        generator=None,
        *,
        prefetch_factor: int | None = None,
        persistent_workers: bool = False,
        pin_memory_device: str = "",
    ) -> None:
        super().__init__()
        self.data = data

In [6]:
import polars as pl


class FeatureGenerator(DataGenerator[int, tuple[Tensor, Tensor]]):
    def __init__(self, data: Tensor):
        super().__init__(range(len(data)))
        self.data = data

    def get(self, idx) -> tuple[Tensor, Tensor]:
        return self.data[idx], self.data[idx + 1]

    def get_metadata(self, img_id: int | None = None) -> pl.DataFrame:
        ...
        # return super().get_metadata(img_id)


fg = FeatureGenerator(tf.convert_to_tensor(a))

2023-08-16 08:52:57.742526: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-08-16 08:52:57.743747: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-08-16 08:52:57.743813: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-08-16 08:52:57.745368: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-08-16 08:52:57.745481: I tensorflow/compile

In [10]:
class S(Sequence[int]):
    ...


S()

TypeError: Can't instantiate abstract class S with abstract methods __getitem__, __len__

# Loader

# Autoencoder

In [8]:
class Autoencoder(Model):
    """
    tensorflow generative [autoencoder](https://www.tensorflow.org/tutorials/generative/autoencoder)
    """

    def __init__(self, latent_dim: int, shape: tuple[int, ...]) -> None:
        super().__init__()
        self.latent_dim = latent_dim
        self.shape = shape
        self.encoder = tf.keras.Sequential(
            [
                layers.Flatten(),
                layers.Dense(latent_dim, activation="relu"),
            ]
        )
        self.decoder = tf.keras.Sequential(
            [layers.Dense(tf.math.reduce_prod(shape), activation="sigmoid"), layers.Reshape(shape)]
        )

    def call(self, x: tf.Tensor, training: bool = False) -> tf.Tensor:
        encoded = self.encoder.__call__(x)
        decoded = self.decoder.__call__(encoded)
        return decoded