In [93]:
from python_scripts.variational_autoencoders import ConditionalVAE, JointVAE
from torchfusion.gan.applications import StandardGenerator, StandardProjectionDiscriminator
from torchsummary import summary

In [84]:
n_classes = 9
n_channels = 3
latent_dims = 100
categorical_dims = 10

In [85]:
cVAE = ConditionalVAE(n_channels, n_classes, latent_dims)
jVAE = JointVAE(n_channels, latent_dims, categorical_dims)

### Conditional VAE architecture

In [86]:
summary(cVAE.encoder.cuda(), (4, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 14, 14]           1,184
       BatchNorm2d-2           [-1, 32, 14, 14]              64
         LeakyReLU-3           [-1, 32, 14, 14]               0
            Conv2d-4             [-1, 64, 7, 7]          18,496
       BatchNorm2d-5             [-1, 64, 7, 7]             128
         LeakyReLU-6             [-1, 64, 7, 7]               0
            Conv2d-7            [-1, 128, 4, 4]          73,856
       BatchNorm2d-8            [-1, 128, 4, 4]             256
         LeakyReLU-9            [-1, 128, 4, 4]               0
           Conv2d-10            [-1, 256, 2, 2]         295,168
      BatchNorm2d-11            [-1, 256, 2, 2]             512
        LeakyReLU-12            [-1, 256, 2, 2]               0
           Conv2d-13            [-1, 512, 1, 1]       1,180,160
      BatchNorm2d-14            [-1, 51

In [87]:
print("Decoder input layer:\n", cVAE.decoder_input_layer)
print("\nDecoder hidden layers: ")
summary(cVAE.cuda().decoder, (512, 2, 2))
print("\nDecoder output layer:\n")
summary(cVAE.decoder_output_layer, (32, 32, 32))

Decoder input layer:
 Linear(in_features=109, out_features=2048, bias=True)

Decoder hidden layers: 
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
   ConvTranspose2d-1            [-1, 256, 4, 4]       1,179,904
       BatchNorm2d-2            [-1, 256, 4, 4]             512
         LeakyReLU-3            [-1, 256, 4, 4]               0
   ConvTranspose2d-4            [-1, 128, 8, 8]         295,040
       BatchNorm2d-5            [-1, 128, 8, 8]             256
         LeakyReLU-6            [-1, 128, 8, 8]               0
   ConvTranspose2d-7           [-1, 64, 16, 16]          73,792
       BatchNorm2d-8           [-1, 64, 16, 16]             128
         LeakyReLU-9           [-1, 64, 16, 16]               0
  ConvTranspose2d-10           [-1, 32, 32, 32]          18,464
      BatchNorm2d-11           [-1, 32, 32, 32]              64
        LeakyReLU-12           [-1, 32, 32, 32]               0
To

### Joint VAE architecture

In [92]:
summary(jVAE.encoder.cuda(), (3, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 14, 14]             896
       BatchNorm2d-2           [-1, 32, 14, 14]              64
         LeakyReLU-3           [-1, 32, 14, 14]               0
            Conv2d-4             [-1, 64, 7, 7]          18,496
       BatchNorm2d-5             [-1, 64, 7, 7]             128
         LeakyReLU-6             [-1, 64, 7, 7]               0
            Conv2d-7            [-1, 128, 4, 4]          73,856
       BatchNorm2d-8            [-1, 128, 4, 4]             256
         LeakyReLU-9            [-1, 128, 4, 4]               0
           Conv2d-10            [-1, 256, 2, 2]         295,168
      BatchNorm2d-11            [-1, 256, 2, 2]             512
        LeakyReLU-12            [-1, 256, 2, 2]               0
           Conv2d-13            [-1, 512, 1, 1]       1,180,160
      BatchNorm2d-14            [-1, 51

In [91]:
print("Decoder input layer:\n", jVAE.decoder_input)
print("\nDecoder hidden layers: ")
summary(jVAE.cuda().decoder, (512, 2, 2))
print("\nDecoder output layer:\n")
summary(jVAE.final_layer, (32, 32, 32))

Decoder input layer:
 Linear(in_features=110, out_features=2048, bias=True)

Decoder hidden layers: 
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
   ConvTranspose2d-1            [-1, 256, 4, 4]       1,179,904
       BatchNorm2d-2            [-1, 256, 4, 4]             512
         LeakyReLU-3            [-1, 256, 4, 4]               0
   ConvTranspose2d-4            [-1, 128, 8, 8]         295,040
       BatchNorm2d-5            [-1, 128, 8, 8]             256
         LeakyReLU-6            [-1, 128, 8, 8]               0
   ConvTranspose2d-7           [-1, 64, 16, 16]          73,792
       BatchNorm2d-8           [-1, 64, 16, 16]             128
         LeakyReLU-9           [-1, 64, 16, 16]               0
  ConvTranspose2d-10           [-1, 32, 32, 32]          18,464
      BatchNorm2d-11           [-1, 32, 32, 32]              64
        LeakyReLU-12           [-1, 32, 32, 32]               0
To

### Conditional GAN architecture

In [94]:
# Defines generator and discriminator
G = StandardGenerator(
    output_size=(n_channels,32,32),
    latent_size=latent_dims,
    num_classes=n_classes
)

D = StandardProjectionDiscriminator(
    input_size=(n_channels,32,32),
    apply_sigmoid=False,
    num_classes=n_classes
)

In [101]:
summary(G.cuda(), [(latent_dims,), (1,)])

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
   ConvTranspose2d-1            [-1, 256, 4, 4]         409,856
       BatchNorm2d-2            [-1, 256, 4, 4]               0
         Embedding-3               [-1, 1, 256]           2,304
         Embedding-4               [-1, 1, 256]           2,304
ConditionalBatchNorm2d-5            [-1, 256, 4, 4]               0
         LeakyReLU-6            [-1, 256, 4, 4]               0
         LeakyReLU-7            [-1, 256, 4, 4]               0
         LeakyReLU-8            [-1, 256, 4, 4]               0
StandardGeneratorBlock-9            [-1, 256, 4, 4]               0
  ConvTranspose2d-10            [-1, 128, 8, 8]         524,416
      BatchNorm2d-11            [-1, 128, 8, 8]               0
        Embedding-12               [-1, 1, 128]           1,152
        Embedding-13               [-1, 1, 128]           1,152
ConditionalBatchNorm2d-14      

In [104]:
summary(D.cuda(), [(n_classes, 32, 32), (1,)])

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 32, 32]             896
         LeakyReLU-2           [-1, 32, 32, 32]               0
StandardDiscriminatorBlock-3           [-1, 32, 32, 32]               0
            Conv2d-4           [-1, 64, 16, 16]          32,832
         LeakyReLU-5           [-1, 64, 16, 16]               0
         LeakyReLU-6           [-1, 64, 16, 16]               0
         LeakyReLU-7           [-1, 64, 16, 16]               0
StandardDiscriminatorBlock-8           [-1, 64, 16, 16]               0
           Dropout-9           [-1, 64, 16, 16]               0
           Conv2d-10            [-1, 8, 16, 16]             512
           Conv2d-11            [-1, 8, 16, 16]             512
           Conv2d-12           [-1, 64, 16, 16]           4,096
          Softmax-13             [-1, 256, 256]               0
    SelfAttention-14   

TypeError: can't multiply sequence by non-int of type 'tuple'