In [None]:
from __future__ import annotations

from abc import ABC, abstractmethod
from collections import namedtuple
from typing import NamedTuple, TypeVar

In [None]:
T = TypeVar("T")


class _NamedTuple(tuple[T, ...], ABC):
    r"""To check for namedtuple."""

    __slots__ = ()

    @classmethod
    @abstractmethod
    def _make(cls, iterable) -> _NamedTuple[T, ...]: ...

    @abstractmethod
    def _replace(self, /, **kwds) -> None: ...

    @property
    @abstractmethod
    def _fields(self) -> tuple[T, ...]: ...

    @property
    @abstractmethod
    def _field_defaults(self) -> dict[str, T]: ...

    @abstractmethod
    def _asdict(self) -> dict[str, T]: ...

In [None]:
def register_namedtuple(obj, fields: list[str], /, *, name: str):
    if not name.isidentifier():
        raise ValueError(f"{name} is not a valid identifier!")

    obj._tuple = namedtuple(name, fields)
    _NamedTuple.register(obj._tuple)

In [None]:
class Foo:
    _tuple: NamedTuple[str, ...]

    def __init__(self, fields: list[str]):
        register_namedtuple(self, fields, name="FooTuple")

In [None]:
foo = Foo(["a", "b", "c"])
FooTup = foo._tuple
footup = foo._tuple(1, 2, 3)

In [None]:
assert isinstance(footup, tuple)
assert issubclass(FooTup, tuple)
assert isinstance(footup, _NamedTuple)
assert issubclass(FooTup, _NamedTuple)

In [None]:
import gc
import pickle

In [None]:
from collections import namedtuple


class ClassWithTuple:
    tuple_id: str
    tuple_type: type[tuple]

    def __init__(self, name: str, fields: list[str]) -> None:
        self.tuple_type = namedtuple(name, fields)
        self.tuple_id = f"_{name}_{self.__class__.__name__}_{hash(self)}"
        self.tuple_type.__qualname__ = self.tuple_id

        if self.tuple_id in globals():
            raise RuntimeError(f"A class '{self.tuple_id}' exists!")
        globals()[self.tuple_id] = self.tuple_id

    def __del__(self):
        del globals()[self.tuple_id]
        del self


obj = ClassWithTuple("FooTuple", ["a", "b", "c"])
obj.tuple_type(1, 2, 3)

In [None]:
class ClassWithTuple:
    tuple_id: str
    tuple_type: type[tuple]

    def __init__(self, name: str, fields: list[str]) -> None:
        self.a, self.b = 1, 2
        register_tuple(self, name, fields)

In [None]:
ClassWithTuple(1, 2).b

In [None]:
groups1 = obj(("foo1", "bar1", "baz1"))
groups2 = obj(("foo2", "bar2", "baz2"))
pickle1 = pickle.dumps(groups1)
pickle2 = pickle.dumps(groups2)
tuple1 = pickle.loads(pickle1)
tuple2 = pickle.loads(pickle2)
assert type(groups1) == type(groups2)
assert type(tuple1) == type(tuple2)
assert type(tuple1) == type(groups1)
assert tuple1 == groups1

In [None]:
encoder.__del__

In [None]:
o = object()

In [None]:
type(o).__del__

In [None]:
x = 2

In [None]:
encoder((1, 2, 3))

In [None]:
del encoder
gc.collect()
# print(sys.getrefcount(identifier))
# dir(__main__)

In [None]:
e

In [None]:
def doit():
    encoder = TupleSplitter({"a": [0, 1], "b": [2]})
    encoder(("foo1", "bar1", "baz1"))
    encoder(("foo2", "bar2", "baz2"))
    # pickle1 = pickle.dumps(groups1)
    # pickle2 = pickle.dumps(groups2)
    # tuple1 = pickle.loads(pickle1)
    # tuple2 = pickle.loads(pickle2)

In [None]:
from tqdm.autonotebook import trange

In [None]:
import os

import psutil

In [None]:
for k in (pbar := trange(1_000_000)):
    doit()
    if k % 10_000 == 0:
        process = psutil.Process(os.getpid())
        memory = process.memory_info().rss  # in bytes
        pbar.set_postfix(memory=f"{memory // 1024**2} MiB")

In [None]:
import pickle
from collections import namedtuple

import torch
from torch import Tensor
from torch.utils.data import DataLoader, Dataset

In [None]:
class TupleDataset(Dataset[tuple[Tensor, ...]]):
    r"""Sequential Dataset."""

    def __init__(
        self,
        **tensors: dict[str, Tensor],
    ):
        first = next(iter(tensors.values()))
        self.LEN = len(first)

        assert all(len(tensor) == len(first) for tensor in tensors.values())

        self.tensors = tensors
        self.tuple = namedtuple("Sample", tensors.keys())
        # copyreg.pickle(self.tuple, namedtuple)
        # copyreg.constructor(self.tuple)

        tuple_name = f"{self.tuple.__name__}"  # {hash(self.tuple)}"
        tuple_qualname = f"{self.tuple.__name__}{hash(self.tuple)}"
        self.tuple.__qualname__ = tuple_qualname

        if tuple_qualname in globals():
            raise RuntimeError(
                f"A class of name '{tuple_name}' already present in globals!!"
            )
        globals()[tuple_qualname] = self.tuple

    def __len__(self):
        r"""Length of the dataset."""
        return self.LEN

    def __getitem__(self, idx) -> tuple[Tensor, ...]:
        r"""Get the same slice from each tensor."""
        return self.tuple(**{key: tensor[idx] for key, tensor in self.tensors.items()})

In [None]:
t = torch.randn(100)
x = torch.randn(100, 5)

In [None]:
ds = TupleDataset(t=t, x=x)
sample = next(iter(ds))
pickle.dumps(sample)
dloader = DataLoader(ds, batch_size=10, num_workers=5)
iloader = iter(dloader)
first = next(iloader)
second = next(iloader)

for sample in iloader:
    pass

type(first), type(second), type(sample)

In [None]:
from tsdm.datasets import Traffic

In [None]:
ds = Traffic()

In [None]:
ds.timeseries