In [1]:
from typing import cast

from pandas import Categorical, DataFrame, Series
from seaborn import load_dataset
from torch import float32, tensor

from modugant.matrix import Matrix

iris = DataFrame(load_dataset("iris"))
species = cast('Series[str]', iris.pop("species"))
iris['species'] = Categorical(species).codes
data = Matrix.load(tensor(iris.values, dtype = float32), (150, 5))

In [2]:
## There are 4 sizes that are used as type parameters for the needed protocols.
## using the `Dim` class, we can ensure that our objects align in their dimensionalities.

## C: The number of conditional variables

from modugant.matrix.dim import Dim

conditions = Dim[0](0)
## L: The number of latent variables
latent = Dim[10](10)
## G: The number of generated features
generated = Dim[5](5)
## D: The number of real features to discriminate
dim = Dim[5](5)
## The batch size
batch = Dim[8](8)

In [3]:
## A `Connector` provides a connection between
##   the real data source and the discriminator
##   the generator and the discriminator

## Instantiate a direct connector which makes no transformations between data/generation and the discriminator
##   A connector may have a `Sampler` which is used to sample from the real data source

from modugant.connectors import DirectConnector
from modugant.samplers import RandomSampler

dim = Dim[5](5)
batch = Dim[8](8)

connector = DirectConnector(
    dim,
    sampler = RandomSampler(
        dim,
        data,
        0.8
    )
)

print(connector.sample(batch))

tensor([[6.3000, 3.4000, 5.6000, 2.4000, 2.0000],
        [5.7000, 2.9000, 4.2000, 1.3000, 1.0000],
        [4.5000, 2.3000, 1.3000, 0.3000, 0.0000],
        [4.8000, 3.0000, 1.4000, 0.3000, 0.0000],
        [6.9000, 3.1000, 5.4000, 2.1000, 2.0000],
        [5.8000, 2.7000, 5.1000, 1.9000, 2.0000],
        [6.7000, 3.3000, 5.7000, 2.1000, 2.0000],
        [5.1000, 3.3000, 1.7000, 0.5000, 0.0000]])


In [4]:
## Use an iterating `Sampler` instead

from modugant.samplers import IteratingSampler

connector = DirectConnector(
    dim,
    sampler = IteratingSampler(
        dim,
        data,
        0.8
    )
)

print(connector.sample(batch))

tensor([[5.9000, 3.2000, 4.8000, 1.8000, 1.0000],
        [7.0000, 3.2000, 4.7000, 1.4000, 1.0000],
        [4.7000, 3.2000, 1.3000, 0.2000, 0.0000],
        [4.8000, 3.4000, 1.9000, 0.2000, 0.0000],
        [4.4000, 3.2000, 1.3000, 0.2000, 0.0000],
        [5.1000, 2.5000, 3.0000, 1.1000, 1.0000],
        [5.1000, 3.3000, 1.7000, 0.5000, 0.0000],
        [5.5000, 2.4000, 3.7000, 1.0000, 1.0000]])


In [5]:
## Create and use a custom sampler

from typing import override

from torch import Tensor

from modugant.matrix import Matrix
from modugant.matrix.index import Index
from modugant.matrix.ops import zeros
from modugant.protocols import Sampler


class CustomSampler[D: int](Sampler[D]):
    def __init__(self, dim: D, data: Tensor):
        self._outputs = dim
        self.__data = Matrix.load(data, (data.shape[0], dim))
        self.__index = 0
    @override
    def sample[N: int](self, batch: N) -> Matrix[N, D]:
        if self.__index + batch > self.__data.shape[0]:
            self.__index = 0
        sample = Index.slice(self.__index, batch)
        return self.__data[sample, ...]
    @override
    def restart(self) -> None:
        self.__index = 0
    @property
    @override
    def holdout(self) -> Matrix[int, D]:
        return zeros((0, self._outputs))

dim = Dim[5](5)
batch = Dim[8](8)

connector = DirectConnector(
    dim,
    sampler = CustomSampler(dim, data)
)

print(connector.sample(batch))

tensor([[5.1000, 3.5000, 1.4000, 0.2000, 0.0000],
        [4.9000, 3.0000, 1.4000, 0.2000, 0.0000],
        [4.7000, 3.2000, 1.3000, 0.2000, 0.0000],
        [4.6000, 3.1000, 1.5000, 0.2000, 0.0000],
        [5.0000, 3.6000, 1.4000, 0.2000, 0.0000],
        [5.4000, 3.9000, 1.7000, 0.4000, 0.0000],
        [4.6000, 3.4000, 1.4000, 0.3000, 0.0000],
        [5.0000, 3.4000, 1.5000, 0.2000, 0.0000]])


In [6]:
## Use a `LoadedSampler`` with a `Loader` to transform the data before sampling

## Use normalization on data index 0, 1, 2, 3

from modugant.loaders import StandardizeLoader
from modugant.samplers import LoadingSampler

dim = Dim[5](5) # dimension sampled
normed = Dim[4](4) # dimension loaded from sampler
batch = Dim[8](8)

connector = DirectConnector(
    normed,
    sampler = LoadingSampler(
        sampler = RandomSampler(
            dim,
            data,
            0.8
        ),
        loader = StandardizeLoader(
            normed,
            data,
            index = [0, 1, 2, 3]
        )
    )
)

print(connector.sample(batch))

tensor([[-0.2939, -0.5904,  0.6469,  1.0504],
        [-0.8977,  1.7039, -1.0525, -1.0487],
        [-0.7769,  2.3922, -1.2791, -1.4422],
        [-0.7769,  2.3922, -1.2791, -1.4422],
        [ 1.2761,  0.3273,  1.1001,  1.4440],
        [ 1.1553,  0.3273,  1.2134,  1.4440],
        [-0.8977,  1.7039, -1.2225, -1.3111],
        [ 0.5515, -0.8198,  0.6469,  0.7880]])


In [7]:
## Create the same using the `Loader` separately

dim = Dim[5](5) # dimension sampled
normed = Dim[4](4) # dimension loaded from sampler
batch = Dim[8](8)

connector = DirectConnector(
    normed,
    sampler = RandomSampler(
        dim,
        data,
        0.8
    ),
    loader = StandardizeLoader(
        normed,
        data,
        index = [0, 1, 2, 3]
    )
)

print(connector.sample(10))

tensor([[-2.9386e-01, -8.1982e-01,  2.5038e-01,  1.3207e-01],
        [-1.2600e+00,  7.8617e-01, -1.0525e+00, -1.3111e+00],
        [ 6.7225e-01,  3.2732e-01,  4.2033e-01,  3.9445e-01],
        [ 1.0345e+00,  9.7889e-02,  5.3362e-01,  3.9445e-01],
        [-1.1392e+00, -1.5081e+00, -2.5945e-01, -2.6151e-01],
        [-1.5015e+00,  1.2450e+00, -1.5623e+00, -1.3111e+00],
        [-1.1392e+00, -1.3154e-01, -1.3358e+00, -1.3111e+00],
        [-1.8638e+00, -1.3154e-01, -1.5057e+00, -1.4422e+00],
        [-1.7309e-01,  1.7039e+00, -1.1658e+00, -1.1799e+00],
        [-5.2330e-02, -8.1982e-01,  8.0440e-02,  8.7455e-04]])


In [8]:
## Combine multiple Loaders for different portions of the underlying data

## Normalize the data at index 0, 1, 2, 3
## One-Hot Category encode the data at index 4
## Directly load the data at index 0 through 4

from modugant.loaders import CategoryLoader, DirectLoader, JointLoader

sampled = Dim[5](5) # dimension sampled
normed = Dim[4](4) # dimension normalized
direct = Dim[4](4) # dimension directly loaded
category = Dim[3](3) # dimension category loaded
outputs = Dim[11](11) # total dimension loaded
batch = Dim[8](8)

connector = DirectConnector(
    outputs,
    sampler = RandomSampler(
        sampled,
        data,
        0.8
    ),
    loader = JointLoader(
        outputs,
        loaders = (
            StandardizeLoader(
                normed,
                data,
                index = [0, 1, 2, 3]
            ),
            CategoryLoader(
                category,
                index = [(4, 3)]
            ),
            DirectLoader(
                direct,
                index = [(0, 4)]
            )
        )
    )
)

print(connector.sample(batch))

tensor([[-5.3538e-01, -1.3154e-01,  4.2033e-01,  3.9445e-01,  0.0000e+00,
          1.0000e+00,  0.0000e+00,  5.4000e+00,  3.0000e+00,  4.5000e+00,
          1.5000e+00],
        [-4.1462e-01, -1.0493e+00,  3.6368e-01,  8.7455e-04,  0.0000e+00,
          1.0000e+00,  0.0000e+00,  5.5000e+00,  2.6000e+00,  4.4000e+00,
          1.2000e+00],
        [ 1.1553e+00, -5.9039e-01,  5.9027e-01,  2.6326e-01,  0.0000e+00,
          1.0000e+00,  0.0000e+00,  6.8000e+00,  2.8000e+00,  4.8000e+00,
          1.4000e+00],
        [ 5.5149e-01, -3.6097e-01,  1.0434e+00,  7.8803e-01,  0.0000e+00,
          0.0000e+00,  1.0000e+00,  6.3000e+00,  2.9000e+00,  5.6000e+00,
          1.8000e+00],
        [ 2.2422e+00, -1.3154e-01,  1.3267e+00,  1.4440e+00,  0.0000e+00,
          0.0000e+00,  1.0000e+00,  7.7000e+00,  3.0000e+00,  6.1000e+00,
          2.3000e+00],
        [ 7.9301e-01, -5.9039e-01,  4.7697e-01,  3.9445e-01,  0.0000e+00,
          1.0000e+00,  0.0000e+00,  6.5000e+00,  2.8000e+00,  4.6000e+0

In [9]:
## Create a custom `Loader` that fully implements the `Loader` protocol

from typing import List

from modugant.matrix import Index
from modugant.protocols import Loader


class ZeroMeanLoader[D: int](Loader[D]):
    def __init__(self, dim: D, data: Tensor, index: List[int]):
        self._outputs = dim
        matrix = Matrix.load(data, (data.shape[0], data.shape[1]))
        self._index = Index.load(index, dim)
        self._mean = matrix[..., self._index].mean(dim = 0, keepdim = True)
    @override
    def load[N: int](self, data: Matrix[N, int]) -> Matrix[N, D]:
        return data[..., self._index] - self._mean

sampled = Dim[5](5) # dimension sampled
normed = Dim[4](4) # dimension normalized
centered = Dim[4](4) # dimension zero-mean loaded
category = Dim[3](3) # dimension category loaded
outputs = Dim[11](11) # total dimension loaded
batch = Dim[8](8)

connector = DirectConnector(
    outputs,
    sampler = RandomSampler(
        sampled,
        data,
        0.8
    ),
    loader = JointLoader(
        outputs,
        loaders = (
            StandardizeLoader(
                normed,
                data,
                index = [0, 1, 2, 3]
            ),
            CategoryLoader(
                category,
                index = [(4, 3)]
            ),
            ZeroMeanLoader(
                centered,
                data,
                index = [0, 1, 2, 3]
            )
        )
    )
)

print(connector.sample(batch))

tensor([[-1.1392,  1.2450, -1.3358, -1.4422,  1.0000,  0.0000,  0.0000, -0.9433,
          0.5427, -2.3580, -1.0993],
        [-1.7430, -0.1315, -1.3924, -1.3111,  1.0000,  0.0000,  0.0000, -1.4433,
         -0.0573, -2.4580, -0.9993],
        [-0.8977,  0.7862, -1.2791, -1.3111,  1.0000,  0.0000,  0.0000, -0.7433,
          0.3427, -2.2580, -0.9993],
        [ 0.1892, -0.8198,  0.7602,  0.5256,  0.0000,  1.0000,  0.0000,  0.1567,
         -0.3573,  1.3420,  0.4007],
        [ 0.5515, -1.7375,  0.3637,  0.1321,  0.0000,  1.0000,  0.0000,  0.4567,
         -0.7573,  0.6420,  0.1007],
        [ 2.2422, -0.5904,  1.6666,  1.0504,  0.0000,  0.0000,  1.0000,  1.8567,
         -0.2573,  2.9420,  0.8007],
        [ 1.2761,  0.0979,  0.6469,  0.3945,  0.0000,  1.0000,  0.0000,  1.0567,
          0.0427,  1.1420,  0.3007],
        [-1.0184,  1.2450, -1.3358, -1.3111,  1.0000,  0.0000,  0.0000, -0.8433,
          0.5427, -2.3580, -0.9993]])


In [10]:
## Use a `ComposedConnector` to further modularize

## A connector can be composed from the following modules
##   `Conditioner`: a class to create and use conditions
##   `Interceptor`: a class to transform generated data to fit the discriminator
##   `Updater`: a class to add additional loss to the generator
##   `Sampler`: a class to sample from the real data source
##   `Loader`: Optionally, a class to transform the real data after sampling

## Recreate the previous connector using a `ComposedConnector`

from modugant.conditioners import NoneConditioner
from modugant.connectors import ComposedConnector
from modugant.interceptors import DirectInterceptor
from modugant.updaters import StaticUpdater

conditions = Dim.zero() ## no conditions, same as Dim[0](0)
dim = Dim[7](7)
normed = Dim[4](4)
category = Dim[3](3)
batch = Dim[8](8)

connector = ComposedConnector(
    conditioner = NoneConditioner(dim),
    interceptor = DirectInterceptor(conditions, dim),
    updater = StaticUpdater(Dim.zero(), dim),
    sampler = LoadingSampler(
        sampler = RandomSampler(
            data.shape[1],
            data,
            0.8
        ),
        loader = JointLoader(
            dim,
            loaders = (
                StandardizeLoader(
                    normed,
                    data,
                    index = [0, 1, 2, 3]
                ),
                CategoryLoader(
                    category,
                    index = [(4, 3)]
                )
            )
        )
    )
)

print(connector.sample(batch))

tensor([[ 0.9138, -0.3610,  0.4770,  0.1321,  0.0000,  1.0000,  0.0000],
        [ 1.8799, -0.5904,  1.3267,  0.9192,  0.0000,  0.0000,  1.0000],
        [-0.0523, -0.8198,  0.1937, -0.2615,  0.0000,  1.0000,  0.0000],
        [-1.5015,  1.2450, -1.5623, -1.3111,  1.0000,  0.0000,  0.0000],
        [-0.2939, -0.5904,  0.6469,  1.0504,  0.0000,  0.0000,  1.0000],
        [-1.2600, -0.1315, -1.3358, -1.1799,  1.0000,  0.0000,  0.0000],
        [-0.1731,  1.7039, -1.1658, -1.1799,  1.0000,  0.0000,  0.0000],
        [ 1.2761,  0.0979,  0.6469,  0.3945,  0.0000,  1.0000,  0.0000]])


In [11]:
## Use a SoftmaxInterceptor on the category columns

from modugant.generators import ResidualGenerator
from modugant.interceptors import SoftmaxInterceptor

conditions = Dim[0](0)
latent = Dim[3](3)
generated = Dim[3](3)
dim = Dim[3](3)

generator = ResidualGenerator(
    conditions,
    latent,
    generated,
    steps = [2],
    learning = 0.01,
    decay = 0.001
)

connector = ComposedConnector(
    conditioner = NoneConditioner(dim),
    interceptor = SoftmaxInterceptor(conditions, generated, dim, index = [(0, 3)]),
    updater = StaticUpdater(Dim.zero(), dim),
    sampler = LoadingSampler(
        sampler = RandomSampler(
            data.shape[1],
            data,
            0.8
        ),
        loader = CategoryLoader(
            category,
            index = [(4, dim)]
        )
    )
)

# create empty conditions
condition = zeros((10, conditions), dtype = float32)
# sample from the generator
fake = generator.sample(condition)
# prepare the fake data for the discriminator
prepared = connector.prepare(condition, fake)

print(fake, prepared, sep = '\n')

tensor([[-0.4880, -0.7226,  0.1877],
        [ 0.4701, -0.5223, -0.5036],
        [ 0.2934, -0.8172,  0.1433],
        [ 0.2766, -0.4117, -0.2942],
        [ 0.5163, -0.1524, -0.4131],
        [-0.2032, -0.5554, -0.4482],
        [ 0.5067, -0.3434, -0.5752],
        [ 0.5616, -0.7842,  0.1741],
        [ 0.1884, -0.4760, -0.1681],
        [ 0.5014, -0.6263,  0.5028]], grad_fn=<TanhBackward0>)
tensor([[0.2662, 0.2105, 0.5232],
        [0.5719, 0.2120, 0.2160],
        [0.4566, 0.1504, 0.3930],
        [0.4837, 0.2430, 0.2733],
        [0.5243, 0.2687, 0.2070],
        [0.4023, 0.2829, 0.3149],
        [0.5662, 0.2420, 0.1919],
        [0.5157, 0.1342, 0.3500],
        [0.4515, 0.2324, 0.3161],
        [0.4301, 0.1393, 0.4307]], grad_fn=<CatBackward0>)


In [12]:
## The previous connector only intercepts and prepares the category data with a `SoftmaxInterceptor`
## Use a `JointInterceptor` to combine a `DirectInterceptor` and a `SoftmaxInterceptor`

from modugant.interceptors import JointInterceptor

conditions = Dim[0](0)
latent = Dim[5](5)
generated = Dim[7](7)
normed = Dim[4](4)
category = Dim[3](3)
dim = Dim[7](7)

generator = ResidualGenerator(
    conditions,
    latent,
    generated,
    steps = [2],
    learning = 0.01,
    decay = 0.001
)

## A `JointInterceptor` will be fed parallel streams of data according to the dimension of each
## The SoftmaxInterceptor below will be given a `category` sized block that occurs after the `normed` sized block
## So the index for the SoftmaxInterceptor is [(0, 3)]

connector = ComposedConnector(
    conditioner = NoneConditioner(dim),
    interceptor = JointInterceptor(
        conditions, # conditions + conditions = conditions
        generated, # normed + category = generated
        dim, # normed + category = dim
        interceptors = (
            DirectInterceptor(conditions, normed),
            SoftmaxInterceptor(conditions, category, category, index = [(0, 3)])
        )
    ),
    updater = StaticUpdater(conditions, generated),
    sampler = LoadingSampler(
        sampler = RandomSampler(
            data.shape[1],
            data,
            0.8
        ),
        loader = JointLoader(
            dim,
            loaders = (
                StandardizeLoader(
                    normed,
                    data,
                    index = [0, 1, 2, 3]
                ),
                CategoryLoader(
                    category,
                    index = [(4, 3)]
                )
            )
        )
    )
)

condition = zeros((batch, Dim.zero()))
fake = generator.sample(condition)

prepared = connector.prepare(condition, fake)

print(fake, prepared, sep = '\n')

tensor([[-0.1846, -0.1248, -0.0965, -0.0767,  0.1525,  0.1945, -0.1127],
        [-0.7863, -0.6746, -0.2433,  0.1546, -0.2298, -0.2362,  0.4956],
        [ 0.3876,  0.7162, -0.2730,  0.9713,  0.2785,  0.3914,  0.0925],
        [ 0.5010,  0.5405,  0.4390, -0.1480, -0.1599,  0.4777, -0.1890],
        [-0.2299, -0.1494, -0.2174,  0.4210, -0.1965, -0.3212,  0.3101],
        [-0.4350, -0.6290, -0.8216, -0.2967,  0.3459, -0.3796, -0.6841],
        [-0.4678, -0.5655, -0.6928,  0.8577,  0.6438,  0.0127, -0.1223],
        [-0.5714, -0.3336, -0.6750,  0.6879, -0.3701, -0.3952,  0.3175]],
       grad_fn=<TanhBackward0>)
tensor([[-0.1846, -0.1248, -0.0965, -0.0767,  0.3559,  0.3711,  0.2730],
        [-0.7863, -0.6746, -0.2433,  0.1546,  0.2464,  0.2448,  0.5089],
        [ 0.3876,  0.7162, -0.2730,  0.9713,  0.3390,  0.3795,  0.2815],
        [ 0.5010,  0.5405,  0.4390, -0.1480,  0.2588,  0.4897,  0.2514],
        [-0.2299, -0.1494, -0.2174,  0.4210,  0.2823,  0.2492,  0.4685],
        [-0.4350, 

In [13]:
## Create and use conditions

from modugant.conditioners import CategoryConditioner

conditions = Dim[3](3)
latent = Dim[5](5)
generated = Dim[7](7)
normed = Dim[4](4)
category = Dim[3](3)
dim = Dim[7](7)

generator = ResidualGenerator(
    conditions,
    latent,
    generated,
    steps = [5, 5, 5],
    learning = 0.01,
    decay = 0.001
)

# Conditions are also fed in paralle tracks into the interceptor
# Where as we were able to use `conditions = 0 => conditions + conditions = conditions` in the previous example
# We now need to apportion the conditions among the joint blocks

connector = ComposedConnector(
    conditioner = CategoryConditioner(
        conditions,
        dim,
        index = [(4, 3)], # index is based on entire data block
        samples = 1 # sample a single category
    ),
    interceptor = JointInterceptor(
        conditions,
        generated,
        dim,
        interceptors = (
            DirectInterceptor(Dim.zero(), normed), # categories are also fed in parallel tracks
            SoftmaxInterceptor(conditions, category, category, index = [(0, 3)]) # index is based on Joint sub blocks
        )
    ),
    updater = StaticUpdater(conditions, generated),
    sampler = LoadingSampler(
        sampler = RandomSampler(
            data.shape[1],
            data,
            0.8
        ),
        loader = JointLoader(
            dim,
            loaders = (
                StandardizeLoader(
                    normed,
                    data,
                    index = [0, 1, 2, 3]
                ),
                CategoryLoader(
                    category,
                    index = [(4, 3)]
                )
            )
        )
    )
)

real = connector.sample(batch)
condition = connector.condition(real)
fake = generator.sample(condition)
prepared = connector.prepare(condition, fake)

print(condition, fake, prepared, sep = '\n')

tensor([[0., 0., 1.],
        [0., 1., 0.],
        [0., 0., 1.],
        [1., 0., 0.],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 1., 0.],
        [1., 0., 0.]])
tensor([[-0.0370, -0.3423,  0.0152,  0.2753,  0.5452,  0.4443,  0.1779],
        [-0.1001,  0.0919, -0.0498,  0.4598,  0.0197,  0.2885,  0.1762],
        [-0.4840, -0.5182,  0.4320,  0.2837,  0.2495,  0.4685,  0.2934],
        [-0.0868,  0.3148, -0.3744,  0.5102,  0.2413,  0.4369, -0.3273],
        [-0.5972,  0.0771,  0.2790,  0.5353,  0.5704,  0.0636,  0.2219],
        [-0.3654, -0.3540,  0.3552,  0.2203,  0.1712,  0.4803,  0.4983],
        [ 0.0132, -0.4928,  0.5045, -0.1473,  0.6535, -0.3288,  0.3369],
        [ 0.0888,  0.0599, -0.2924,  0.4698,  0.0919,  0.5149, -0.3266]],
       grad_fn=<TanhBackward0>)
tensor([[-0.0370, -0.3423,  0.0152,  0.2753,  0.3851,  0.3481,  0.2667],
        [-0.1001,  0.0919, -0.0498,  0.4598,  0.2875,  0.3762,  0.3362],
        [-0.4840, -0.5182,  0.4320,  0.2837,  0.3040,  0.378

In [14]:
## Use a `PooledInterceptor` to maintain a single track

from modugant.conditioners import CategoryConditioner
from modugant.interceptors import PooledInterceptor, SubsetInterceptor

conditions = Dim[3](3)
latent = Dim[5](5)
generated = Dim[7](7)
normed = Dim[4](4)
category = Dim[3](3)
dim = Dim[7](7)

generator = ResidualGenerator(
    conditions,
    latent,
    generated,
    steps = [5, 5, 5],
    learning = 0.01,
    decay = 0.001
)

# The `PooledInterceptor` will maintain a single track of data, so the indices and parameters are offset
# The `SubsetInterceptor` will subset the columns down to the first 4 of the single track
# whereas the `DirectInterceptor` passed forward all of its individual parallel track

connector = ComposedConnector(
    conditioner = CategoryConditioner(
        conditions,
        dim,
        index = [(4, 3)], # index is based on entire data block
        samples = 1 # sample a single category
    ),
    interceptor = PooledInterceptor(
        conditions,
        generated,
        dim,
        interceptors = (
            SubsetInterceptor(conditions, generated, normed, index = [0, 1, 2, 3]),
            SoftmaxInterceptor(conditions, generated, category, index = [(4, 3)]) # the category starts at index 4
        )
    ),
    updater = StaticUpdater(conditions, generated),
    sampler = LoadingSampler(
        sampler = RandomSampler(
            data.shape[1],
            data,
            0.8
        ),
        loader = JointLoader(
            dim,
            loaders = (
                StandardizeLoader(
                    normed,
                    data,
                    index = [0, 1, 2, 3]
                ),
                CategoryLoader(
                    category,
                    index = [(4, 3)]
                )
            )
        )
    )
)

real = connector.sample(batch)
condition = connector.condition(real)
fake = generator.sample(condition)
prepared = connector.prepare(condition, fake)

print(condition, fake, prepared, sep = '\n')

tensor([[1., 0., 0.],
        [0., 0., 1.],
        [0., 1., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.]])
tensor([[-0.1266,  0.2505,  0.2577,  0.4111,  0.2390,  0.3971,  0.2635],
        [ 0.0952, -0.0261,  0.0788, -0.0125,  0.0384,  0.2619,  0.1556],
        [-0.0013,  0.3657,  0.4933,  0.3548,  0.1554, -0.5372,  0.3768],
        [ 0.0239, -0.1403, -0.0917, -0.0412,  0.3303, -0.1760,  0.3304],
        [ 0.0998,  0.2638,  0.4057,  0.0728, -0.0479,  0.1167,  0.0389],
        [ 0.1447, -0.2930,  0.2716, -0.3217,  0.0992, -0.0804,  0.0769],
        [ 0.1895, -0.1140, -0.1931, -0.1650,  0.3327,  0.0480,  0.2447],
        [ 0.2285, -0.2333,  0.0038,  0.2437,  0.1796,  0.5685, -0.1218]],
       grad_fn=<TanhBackward0>)
tensor([[-0.1266,  0.2505,  0.2577,  0.4111,  0.3129,  0.3665,  0.3206],
        [ 0.0952, -0.0261,  0.0788, -0.0125,  0.2963,  0.3705,  0.3331],
        [-0.0013,  0.3657,  0.4933,  0.3548,  0.3639,  0.182

In [16]:
# Use an updater to calculate cross entropy of the generated conditions

from modugant.updaters import EntropyUpdater

generator = ResidualGenerator(
    conditions,
    latent,
    generated,
    steps = [5, 5, 5],
    learning = 0.01,
    decay = 0.001
)

connector = ComposedConnector(
    conditioner = CategoryConditioner(
        conditions,
        dim,
        index = [(4, 3)],
        samples = 1
    ),
    interceptor = PooledInterceptor(
        conditions,
        generated,
        dim,
        interceptors = (
            SubsetInterceptor(conditions, generated, normed, index = [0, 1, 2, 3]),
            SoftmaxInterceptor(conditions, generated, category, index = [(4, 3)])
        )
    ),
    updater = EntropyUpdater(conditions, generated, index = [(0, 4, 3)]),
    sampler = LoadingSampler(
        sampler = RandomSampler(
            data.shape[1],
            data,
            0.8
        ),
        loader = JointLoader(
            dim,
            loaders = (
                StandardizeLoader(
                    normed,
                    data,
                    index = [0, 1, 2, 3]
                ),
                CategoryLoader(
                    category,
                    index = [(4, 3)]
                )
            )
        )
    )
)

real = connector.sample(batch)
condition = connector.condition(real)
fake = generator.sample(condition)
loss = connector.loss(condition, fake)

print(condition, fake, loss, sep = '\n')

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 0., 1.],
        [0., 1., 0.],
        [1., 0., 0.],
        [1., 0., 0.]])
tensor([[-0.7378, -0.0486, -0.5129, -0.6843, -0.6138, -0.1067,  0.4269],
        [ 0.8382, -0.3806,  0.1823, -0.0278,  0.7078, -0.8477, -0.4894],
        [ 0.1514, -0.3090, -0.0240, -0.7253, -0.3106, -0.7019,  0.5350],
        [ 0.4871, -0.2036, -0.2685, -0.0137,  0.4144, -0.6781,  0.4024],
        [ 0.1322, -0.0349, -0.2672,  0.1477, -0.4560,  0.1746, -0.1377],
        [ 0.1769,  0.3874,  0.0355,  0.2355,  0.4388, -0.4561, -0.5445],
        [-0.9239,  0.6723, -0.5012, -0.5903,  0.0074,  0.3382, -0.6351],
        [-0.2162,  0.1264, -0.1182, -0.4310, -0.1877, -0.0890, -0.1409]],
       grad_fn=<TanhBackward0>)
tensor([[1.5261]], grad_fn=<DivBackward0>)


In [20]:
## Again, the `Loader` may be passed in separately

generator = ResidualGenerator(
    conditions,
    latent,
    generated,
    steps = [5, 5, 5],
    learning = 0.01,
    decay = 0.001
)

connector = ComposedConnector(
    conditioner = CategoryConditioner(
        conditions,
        dim,
        index = [(4, 3)],
        samples = 1
    ),
    interceptor = PooledInterceptor(
        conditions,
        generated,
        dim,
        interceptors = (
            SubsetInterceptor(conditions, generated, normed, index = [0, 1, 2, 3]),
            SoftmaxInterceptor(conditions, generated, category, index = [(4, 3)])
        )
    ),
    updater = EntropyUpdater(conditions, generated, index = [(0, 4, 3)]),
    sampler = RandomSampler(
        data.shape[1],
        data,
        0.8
    ),
    loader = JointLoader(
        dim,
        loaders = (
            StandardizeLoader(
                normed,
                data,
                index = [0, 1, 2, 3]
            ),
            CategoryLoader(
                category,
                index = [(4, 3)]
            )
        )
    )
)

real = connector.sample(10)
condition = connector.condition(real)
fake = generator.sample(condition)
loss = connector.loss(condition, fake)

print(condition, fake, loss, sep = '\n')


tensor([[0., 1., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [0., 0., 1.],
        [0., 1., 0.],
        [1., 0., 0.],
        [0., 0., 1.],
        [0., 1., 0.],
        [0., 0., 1.]])
tensor([[-6.5576e-01,  1.3805e-01,  2.8974e-01, -1.8483e-02,  1.8380e-01,
         -2.5284e-01, -5.2404e-01],
        [ 1.4178e-01,  2.3192e-01, -7.6862e-01, -1.5927e-01, -1.9472e-01,
         -5.9275e-01, -9.3441e-02],
        [-1.3210e-01, -6.4582e-01, -1.6819e-01, -1.2974e-01,  6.7975e-01,
          2.8942e-01, -1.6782e-01],
        [-5.0830e-01, -1.4017e-01, -1.8506e-01,  6.2910e-02,  3.4650e-01,
         -1.7748e-02, -5.0655e-01],
        [ 7.6201e-02,  4.8606e-04, -7.1317e-01, -1.3066e-01,  2.5194e-01,
         -4.2284e-01,  5.7424e-02],
        [ 2.2932e-01, -2.8486e-01, -3.8343e-01, -4.6502e-01, -2.6078e-01,
         -2.5511e-01, -2.2651e-01],
        [-2.3805e-01,  3.7786e-01,  1.8699e-02,  3.1162e-01,  2.5627e-01,
         -2.8903e-01, -1.6495e-02],
        [

In [22]:
## A `Transformer` is a union class of `Conditioner`, `Interceptor`, `Updater` and `Loader`

## Separate the category logic into a single `Transformer`

from modugant.transformers import ComposedTransformer

conditions = Dim[3](3)
category = Dim[3](3)
batch = Dim[8](8)

transformer = ComposedTransformer(
    conditioner = CategoryConditioner(
        conditions,
        category,
        index = [(0, 3)],
        samples = 1
    ),
    interceptor = SoftmaxInterceptor(conditions, category, category, index = [(0, 3)]),
    updater = EntropyUpdater(conditions, category, index = [(0, 0, 3)]),
    loader = CategoryLoader(
        category,
        index = [(4, 3)] # the index is fixed and the transformer must always load from the same index
    )
)

# The transformer will have to be paired with a sampler to be used in the demo
# create a sampler

sampler = LoadingSampler(
    sampler = RandomSampler(
        data.shape[1],
        data,
        0.8
    ),
    loader = CategoryLoader(
        category,
        index = [(4, 3)]
    )
)

# sample real data with the sampler
real = sampler.sample(batch)
# condition the real data
condition = transformer.condition(real)

# create a generator, and generate from the condition
latent = Dim[5](5)
generator = ResidualGenerator(
    conditions,
    latent,
    category,
    steps = [5, 5, 5],
    learning = 0.01,
    decay = 0.001
)

fake = generator.sample(condition)
# calculate the loss, calculated on the pre-prepared, generated data
prepared = transformer.prepare(condition, fake)
loss = transformer.loss(condition, fake)

print(condition, fake, prepared, loss, sep = '\n')

tensor([[0, 0, 1],
        [0, 1, 0],
        [0, 0, 1],
        [0, 1, 0],
        [1, 0, 0],
        [1, 0, 0],
        [0, 1, 0],
        [1, 0, 0]])
tensor([[-0.3661,  0.1997,  0.4581],
        [ 0.4774,  0.1061,  0.1220],
        [-0.1411, -0.1356,  0.2979],
        [ 0.4593,  0.3745,  0.2049],
        [ 0.7405,  0.5464,  0.5136],
        [ 0.7230,  0.4614,  0.2779],
        [ 0.4524,  0.2007, -0.0452],
        [ 0.1205, -0.3172,  0.6317]], grad_fn=<TanhBackward0>)
tensor([[0.1984, 0.3493, 0.4523],
        [0.4183, 0.2885, 0.2932],
        [0.2812, 0.2827, 0.4361],
        [0.3712, 0.3410, 0.2878],
        [0.3816, 0.3143, 0.3041],
        [0.4148, 0.3193, 0.2658],
        [0.4192, 0.3259, 0.2549],
        [0.3019, 0.1949, 0.5033]], grad_fn=<CatBackward0>)
tensor([[1.0130]], grad_fn=<DivBackward0>)


In [23]:
# Customize the above transformer to use a flexible source index

from typing import Tuple


class CategoryTransformer[B: int](ComposedTransformer[B, B, B]):
    '''Transformer for a single category'''
    def __init__(
        self,
        index: Tuple[int, B] # no longer fixed to (4, 3)
    ) -> None:
        '''
        Args:
            index: The start index in the original data and the size (bins) of the category
        '''
        (_, bins) = index
        super().__init__(
            conditioner = CategoryConditioner(
                bins,
                bins,
                index = [(0, bins)],
                samples = 1
            ),
            interceptor = SoftmaxInterceptor(bins, bins, bins, index = [(0, bins)]),
            updater = EntropyUpdater(bins, bins, index = [(0, 0, bins)]),
            loader = CategoryLoader(
                bins,
                index = [index]
            )
        )

bins = Dim[3](3)

transformer = CategoryTransformer((4, bins))

In [24]:
## Combine the category transformer with a transformer to handle the non-category columns with a `JointTransformer`

from modugant.transformers import JointTransformer

conditions = Dim[3](3)
latent = Dim[5](5)
generated = Dim[7](7)
normed = Dim[4](4)
category = Dim[3](3)
dim = Dim[7](7)
batch = Dim[8](8)

transformer = JointTransformer(
    conditions,
    generated,
    dim,
    transformers = (
        CategoryTransformer((4, category)),
        ComposedTransformer(
            conditioner = NoneConditioner(normed),
            interceptor = DirectInterceptor(Dim.zero(), normed),
            loader = StandardizeLoader(
                normed,
                data,
                index = [0, 1, 2, 3]
            )
        )
    )
)

generator = ResidualGenerator(
    conditions,
    latent,
    generated,
    steps = [5, 5, 5],
    learning = 0.01,
    decay = 0.001
)

real = connector.sample(batch)
condition = transformer.condition(real)
fake = generator.sample(condition)
loss = transformer.loss(condition, fake)

print(condition, fake, loss, sep = '\n')

tensor([[-1.2600,  0.0979, -1.2225],
        [-0.8977, -1.2787, -0.4294],
        [ 1.2761,  0.3273,  1.1001],
        [ 0.5515, -0.3610,  1.0434],
        [-0.4146,  1.0156, -1.3924],
        [-1.0184,  1.0156, -1.3924],
        [-1.6223, -1.7375, -1.3924],
        [-1.1392,  0.0979, -1.2791]])
tensor([[ 0.5428,  0.2965, -0.5407, -0.0459,  0.4075, -0.5510,  0.0584],
        [ 0.5973,  0.2971,  0.0355, -0.6690,  0.5536, -0.1159,  0.3349],
        [ 0.2190,  0.1934, -0.4236, -0.5345, -0.3103, -0.1656,  0.5281],
        [ 0.4820, -0.0326, -0.2642, -0.4502,  0.1726,  0.1902, -0.0913],
        [ 0.7294,  0.0304,  0.1852,  0.2668, -0.4797,  0.0314, -0.3606],
        [ 0.5886, -0.2676, -0.5938,  0.2007, -0.8421, -0.7538, -0.3728],
        [ 0.4159,  0.1454, -0.4022, -0.8561, -0.2666, -0.2447,  0.1004],
        [ 0.6869,  0.3026,  0.3144, -0.2326,  0.5943,  0.4076, -0.2249]],
       grad_fn=<TanhBackward0>)
tensor([[-1.9197]], grad_fn=<SumBackward1>)


In [25]:
## Create a custom transformer for standardized variables too

from modugant.matrix.dim import Zero


class StandardTransformer[D: int](ComposedTransformer[Zero, D, D]):
    def __init__(
        self,
        dim: D,
        index: List[int],
        data: Tensor
    ):
        super().__init__(
            conditioner = NoneConditioner(dim),
            interceptor = DirectInterceptor(Dim.zero(), dim),
            loader = StandardizeLoader(
                dim,
                data,
                index
            )
        )

conditions = Dim[3](3)
latent = Dim[5](5)
generated = Dim[7](7)
normed = Dim[4](4)
category = Dim[3](3)
dim = Dim[7](7)
batch = Dim[8](8)

transformer = JointTransformer(
    conditions,
    generated,
    dim,
    transformers = (
        StandardTransformer(
            normed,
            [0, 1, 2, 3],
            data
        ),
        CategoryTransformer((4, category))
    )
)

real = connector.sample(batch)
condition = transformer.condition(real)
fake = generator.sample(condition)
loss = transformer.loss(condition, fake)

print(condition, fake, loss, sep = '\n')

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 0., 1.],
        [0., 0., 1.],
        [1., 0., 0.],
        [0., 0., 1.],
        [1., 0., 0.]])
tensor([[ 0.1550,  0.0296, -0.5122, -0.3591, -0.1090, -0.2872, -0.5443],
        [ 0.4830, -0.2732,  0.1904,  0.4756, -0.7595, -0.1429, -0.0110],
        [ 0.3858,  0.2283,  0.0223,  0.3225, -0.3369, -0.0118, -0.0701],
        [ 0.2143, -0.1070, -0.4552, -0.2396, -0.0895,  0.2283,  0.6308],
        [ 0.2660, -0.0261, -0.5603, -0.2857,  0.2970,  0.0220,  0.6261],
        [ 0.3247,  0.6194, -0.8044, -0.1462, -0.0805, -0.2169, -0.3734],
        [ 0.3948,  0.2210, -0.2476, -0.3922, -0.1335, -0.0538,  0.4574],
        [ 0.4068,  0.4317,  0.1199, -0.4742,  0.4297,  0.2258, -0.1033]],
       grad_fn=<TanhBackward0>)
tensor([[0.8836]], grad_fn=<SumBackward1>)


In [26]:
## Use transformers and a `Sampler` into a `Connector` with `JointConnector`

from modugant.connectors.joint import JointConnector

conditions = Dim[3](3)
latent = Dim[5](5)
generated = Dim[7](7)
normed = Dim[4](4)
category = Dim[3](3)
dim = Dim[7](7)

connector = JointConnector(
    conditions,
    generated,
    dim,
    transformers = (
        StandardTransformer(
            normed,
            [0, 1, 2, 3],
            data
        ),
        CategoryTransformer((4, category))
    ),
    sampler = RandomSampler( # adding a sampler allows you to extend a `Transformer` to a `Connector`
        data.shape[1],
        data,
        0.8
    )
)


real = connector.sample(10)
condition = connector.condition(real)
fake = generator.sample(condition)
prepared = connector.prepare(condition, fake)
loss = transformer.loss(condition, fake)

print(condition, fake, prepared, loss, sep = '\n')

tensor([[0., 1., 0.],
        [1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.],
        [0., 1., 0.],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 0., 1.]])
tensor([[ 0.4688,  0.3226, -0.5319, -0.3551, -0.1758, -0.2349, -0.0621],
        [ 0.3252,  0.3307,  0.1370, -0.0019,  0.5459, -0.0141, -0.0687],
        [ 0.7023, -0.1541,  0.1834,  0.2219, -0.1248, -0.0142, -0.2653],
        [ 0.2684,  0.0836, -0.7174, -0.4621, -0.5128, -0.3522,  0.0323],
        [ 0.5215, -0.0884, -0.0687,  0.2617, -0.3020, -0.1196, -0.2039],
        [ 0.6218, -0.1554, -0.1681, -0.1472,  0.2719, -0.0981,  0.0620],
        [ 0.4378, -0.3567, -0.5073, -0.7531,  0.5063, -0.1242,  0.8138],
        [ 0.5635, -0.1350, -0.1116,  0.0510, -0.0673,  0.5776,  0.1742],
        [ 0.4128, -0.3557, -0.3664, -0.0418, -0.0978,  0.0504, -0.0813],
        [ 0.6982,  0.1357, -0.6675, -0.2240, -0.2303,  0.3854, -0.0394]],
       grad_fn=<TanhBackward0>)
tensor([[ 0.4688

In [27]:
# Create a category Transformer for many category variables

class CategoriesTransformer[B: int](ComposedTransformer[B, B, B]):
    '''Transformer for a single category'''
    def __init__(
        self,
        width: B, # total size now needs to be specified, and we will assert it
        index: List[Tuple[int, int]], # take in a list of indices instead
        samples: int = 1
    ) -> None:
        '''
        Args:
            width: The total size of the category
            index: A list of tuples of the start index in the original data and the size (bins) of the category
            size: The number of categories to sample
        '''
        sizes = [size for (_, size) in index]
        assert sum(sizes) == width
        ## The index and size of categories after being loaded
        cumu = [(sum(sizes[:i]), sizes[i]) for i in range(len(index))]
        super().__init__(
            conditioner = CategoryConditioner(
                width,
                width,
                index = cumu,
                samples = samples
            ),
            interceptor = SoftmaxInterceptor(
                width,
                width,
                width,
                index = cumu
            ),
            updater = EntropyUpdater(
                width,
                width,
                # category index and data index are the same
                index = [(start, start, size) for (start, size) in cumu]
            ),
            loader = CategoryLoader(
                width,
                index = index
            )
        )

In [29]:
## Create a custom `Connector`

from typing import Optional, Tuple


class CustomConnector[C: int, D: int](JointConnector[C, D, D]):
    def __init__(
        self,
        conditions: C,
        dim: D,
        data: Tensor,
        continuous: List[int],
        category: List[Tuple[int, int]],
        samples: Optional[int] = 1,
        split: Optional[float] = 0.8,
        sampler: Optional[Sampler[D]] = None
    ):
        '''
        Custom Connector

        Args:
            conditions: The number of conditions (category bins total)
            dim: The total dimensionality of the generated/discriminated data
            data: The data to sample from
            continuous: The indices of the continuous variables
            category: The indices and sizes of the category variables
            size: The number of categories to sample (default 1)
            split: Optional. The proportion of data to sample (default 0.8; unused if `sampler` is provided)
            sampler: Optional. The sampler to use (default `RandomSampler`)
        '''
        sizes = [size for _, size in category]
        assert sum(sizes) == conditions
        assert len(continuous) + conditions == dim
        super().__init__(
            conditions = conditions,
            intermediates = dim,
            outputs = dim,
            transformers = (
                StandardTransformer(
                    len(continuous),
                    continuous,
                    data
                ),
                CategoriesTransformer(
                    conditions,
                    category,
                    samples = samples or 1
                )
            ),
            sampler = sampler or RandomSampler(
                data.shape[1],
                data,
                split or 0.8
            )
        )

latent = Dim[5](5)
normed = Dim[4](4)
category = Dim[3](3)
dim = Dim[7](7)

generator = ResidualGenerator(
    category, # number of conditions
    latent, # number of latent variables
    dim, # number of generated variables
    steps = [5, 5, 5],
    learning = 0.01,
    decay = 0.001
)

connector = CustomConnector(
    category, # number of conditions
    dim, # number of total variables
    data, # data to sample from
    [0, 1, 2, 3], # continuous variables
    [(4, 3)], # category variables
    samples = 1,
    split = 0.8
)


real = connector.sample(10)
condition = connector.condition(real)
fake = generator.sample(condition)
prepared = connector.prepare(condition, fake)
loss = transformer.loss(condition, fake)

print(condition, fake, prepared, loss, sep = '\n')

tensor([[0., 0., 1.],
        [0., 0., 1.],
        [0., 1., 0.],
        [0., 0., 1.],
        [0., 1., 0.],
        [0., 0., 1.],
        [0., 1., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.]])
tensor([[ 0.2995, -0.6231, -0.2404,  0.4545, -0.0735,  0.2097,  0.1870],
        [ 0.4885,  0.4865, -0.4001,  0.0016,  0.2716,  0.2057, -0.2418],
        [ 0.5921, -0.2623, -0.4996,  0.6908, -0.0808, -0.1217,  0.0327],
        [-0.2480, -0.4575,  0.3685, -0.1523, -0.3783,  0.4016,  0.4501],
        [-0.4035,  0.1553,  0.1395,  0.1369,  0.3141, -0.5058, -0.0482],
        [ 0.1874, -0.2999, -0.1268,  0.0325, -0.0037,  0.0482,  0.1397],
        [ 0.2854, -0.2149, -0.1321,  0.2097, -0.0301, -0.6400, -0.3197],
        [ 0.2982,  0.3564,  0.4817, -0.2781, -0.2323,  0.1939, -0.5306],
        [ 0.3032,  0.1069,  0.3197,  0.0317,  0.0048,  0.1249,  0.3599],
        [ 0.0393, -0.1071,  0.6800, -0.4833, -0.2060,  0.1421,  0.6545]],
       grad_fn=<TanhBackward0>)
tensor([[ 0.2995