In [1]:
import tensorflow as tf
import numpy as np

# Use to generate new data by manipulation

## What is this?
VAE stands for Variational Auto Encoder. Which means that is trained to reproduce or reconstruct from a latent space the input. Is an strategy to learn that latent space

It has 2 important blocks the Encoder wich will be responsable for condensing the data into a low dimensional latent space or just a vector.
And the Decoder by taking one point(how do you choose a point? By sampling) from the latent space is able to reconstruct a new image.

## How do VAEs build a latent space?
That is what difference a VAE from a AE that the V which means variational and allows the VAE to create continuous and structured latent spaces.

## How do you sample the latent space?


The latent space is structured, non sparse continuous and low dimentional where each direction encode a meanful axis of variation(V) of the data. And that means it can be manipulated with content vectors. Vectors that are isolated and represent a concept.
Like the concept of smile another image representation can be added to the smile concept vector then passed to the decoder to create a new image with the person smiling.

```There are concept vectors for any independent direction of the latent space```
```deeplearning with bayesian inference```

## Steps
- Build the encoder
    - The output will be 2 vectors mean and variance
- Build the sampler using a random small vector along with the 2 vectors the encoder will give us
    - normal dist value = mean * exp(std) * epsilon / epsilon is a random small vector from the latent space
- Build the loss
    - kubell-divergence
    - reconstruction loss mean already coded in keras
- Build the decoder
    - the decoder will take the sampled input and reconstruct it to a valid image!

# Encoder

The encoder will transform the image into 2 parameters vectors that will be used to form a normal distribution, mean_vector and standard_deviation_vector

Dummy dataset Nmist

In [16]:
(X_train, _), (X_test, _) = tf.keras.datasets.mnist.load_data()

mnist_digits = np.concatenate([X_train, X_test])
mnist_digits = mnist_digits[:, :, :, np.newaxis]/255.

In [17]:
mnist_digits.shape

(70000, 28, 28, 1)

In [18]:
class VAEncoder(tf.keras.layers.Layer):    
    def __init__(self, latent_dims=2, **kwars):
        super().__init__(**kwars)
        self.latent_dims = latent_dims
        
        self.z_mean = tf.keras.layers.Dense(units=latent_dims, activation="selu")
        self.z_sigma = tf.keras.layers.Dense(units=latent_dims, activation="selu")

        # with padding same the activation map will have the same size
        self.conv_1 = tf.keras.layers.Conv2D(filters=32, kernel_size=3, strides=2, padding="same", activation="relu")

        # with padding same the activation map will have the same size
        self.conv_2 = tf.keras.layers.Conv2D(filters=64, kernel_size=3, strides=2, padding="same", activation="relu")

        # Proyection
        self.proy = tf.keras.Sequential([tf.keras.layers.Dense(units=16, activation="relu"), tf.keras.layers.Dense(units=16)])

        self.flatten = tf.keras.layers.Flatten()

    def call(self, inputs, *args, **kwargs):
        x = self.conv_1(inputs)
        x = self.conv_2(x)
        x = self.flatten(x)
        x = self.proy(x)
        return self.z_mean(x), self.z_sigma(x)
    
    def get_config(self):
        return {"latent_dims":self.latent_dims}

In [19]:
encoder = VAEncoder(latent_dims=2)

In [20]:
sample = np.expand_dims(mnist_digits[0], axis=0)
z_mean, z_sigma = encoder(sample)

# Sampler!

We are sampling from a normal distribution

normal = mu + exp(sigma) * epsilon

epsilon is a random number from a normal distribution

In [21]:
class Sampler(tf.keras.layers.Layer):
    def call(self, z_mean, z_sigma):
        # get the batch size
        batch_size = tf.shape(z_mean.shape)[0]
        latent_dim = tf.shape(z_mean)[1]

        #Epsilon should be the same size as our vectors
        # here we are in the training and everything gets processed in batch
        epsilon = tf.random.normal(shape=(batch_size, latent_dim))

        #This returns a sample point from the distribution we are trying to find.
        # A normal distribution
        # Why over 2?
        return z_mean + tf.math.exp(z_sigma/2) * epsilon

In [22]:
Sampler()(z_mean, z_sigma)

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[-1.3263273 ,  0.74788636],
       [ 0.11071271,  0.9165265 ]], dtype=float32)>

# Decoder

In [23]:
# z_mean = tf.expand_dims(z_mean, axis=0)
# z_sigma = tf.expand_dims(z_sigma, axis=0)

In [26]:
class VAEDecoder(tf.keras.layers.Layer):
    def __init__(self, latent_dims=2, image_original_widht=7, image_original_height=7, image_original_channels=1, last_conv_channels=64, trainable=True, name=None, dtype=None, dynamic=False, **kwargs):
        super().__init__(trainable, name, dtype, dynamic, **kwargs)
        self.latent_dims = latent_dims
        self.image_original_widht = image_original_widht
        self.image_original_height = image_original_height
        self.image_original_channels = image_original_channels
        self.last_conv_channels = last_conv_channels

        # image flatten size
        # 7 * 7 * 1= 49
        self.latent_space = tf.keras.layers.Dense(units=self.image_original_widht*self.image_original_height*self.last_conv_channels, activation="relu")
        self.reshape = tf.keras.layers.Reshape((self.image_original_height, self.image_original_widht, self.last_conv_channels))
        self.conv_traspose_1 = tf.keras.layers.Conv2DTranspose(filters=32, kernel_size=3, strides=2, padding="same", activation="relu")
        self.conv_traspose_2 = tf.keras.layers.Conv2DTranspose(filters=64, kernel_size=3, strides=2, padding="same", activation="relu")
        self.conv_image_gen = tf.keras.layers.Conv2D(filters=self.image_original_channels, kernel_size=3, padding="same", activation="sigmoid" if self.image_original_channels==1 else "softmax")

    def call(self, inputs, *args, **kwargs):
        x = self.latent_space(inputs)
        x = self.reshape(x)
        x = self.conv_traspose_1(x)
        x = self.conv_traspose_2(x)
        x = self.conv_image_gen(x)

        return x

In [30]:
sampled_point = Sampler()(z_mean, z_sigma)
print(sampled_point.shape)
# Batch 1 sample
sampled_point = tf.expand_dims(sampled_point, axis=0)

(2, 2)


In [29]:
VAEDecoder()(sampled_point)

InvalidArgumentError: Exception encountered when calling layer 'reshape_4' (type Reshape).

{{function_node __wrapped__Reshape_device_/job:localhost/replica:0/task:0/device:GPU:0}} Input to reshape is a tensor with 6272 values, but the requested shape has 3136 [Op:Reshape]

Call arguments received by layer 'reshape_4' (type Reshape):
  • inputs=tf.Tensor(shape=(1, 2, 3136), dtype=float32)