In [None]:
# undercomplete autoencoder, because encoder has less features
# than its inputs, it must learn the most important ones to output correctly

from tensorflow import keras

encoder = keras.models.Sequential([keras.layers.Dense(2, input_shape=[3])])
decoder = keras.models.Sequential([keras.layers.Dense(3, input_shape=[2])])

autoencoder = keras.models.Sequential([encoder, decoder])

autoencoder.compile(loss="mse", optimizer=keras.optimizers.SGD(lr=0.1))

# training the encoder

history = autoencoder.fit(x_train, x_train, epochs=50)
codings = encoder.predict(x_train) # same dataset is input and targets

# autoencoders find another plane to project the data onto, similar to PCA

In [None]:
# stacked encoder for fashion_mnist

# for a deeper network add lecun normalization

stacked_encoder = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.Dense(100, activation = "selu"),
    keras.layers.Dense(30, activation="selu"),
])

stacked_decoder = keras.models.Sequential([
    keras.layers.Dense(100, activation="selu", input_shape=[30]),
    keras.layers.Dense(28*28, activation="sigmoid"),
    keras.layers.Reshape([28,28])
])

# better to use binary crossentropy then  mse because 
# we are doing classification not regression

stacked_ae = keras.models.Sequential([stacked_encoder, stacked_decoder]) 
stacked_ae.compile(loss="binary_crossentropy", optimizer=keras.optimizers.SGD(lr=1.5))
history = stacked_ae.fit(x_train, x_train, epochs=10, valid)

In [None]:
# visualization the compressed/decoded reconstructions

def plot_image(image):
    plt.imshow(image, cmap="binary")
    plt.axis("off")
    
def show_reconstructions(model, n_images=5):
    reconstructions = model.predict(x_valid[:n_images])
    fig = plt.figure(figsize=(n_images*1.5, 3))
    for image_index in range(n_images):
        plt.subplot(2, n_images, 1 + image_index)
        plot_image(x_valid[image_index])
        plt.subplot(2, n_images, 1 + n_images + image_index)
        plot_image(reconstructions[image_index])

show_reconstructions(stacked_ae)

In [None]:
# encoders aren't good for visualization, best when combined with another method for this

from sklearn.manifold import TSNE

x_valid_compressed = stacked_encoder.predict(x_valid)
tsne = TSNE()
x_valid_2D = tsne.fit_transform(x_valid_compressed)

# plot the dataset

plt.scatter(x_valid2D[:, 0], x_valid2D[:, 1], c=y_valid, s=10, cmap="tab10")

In [None]:
# tying the decoder weights to the encoder weights to speed up training

class DenseTranspose(keras.layers.Layer):
    def __init__(self, dense, activation=None, **kwargs):
        self.dense = dense
        self.activation = keras.activations.get(activation)
        super().__init__(**kwargs)
    def build(self, batch_input_shape):
        self.biases = self.add_weight(
            name="bias",
            initializer="zeros",
            shape = [self.dense.input_shape[-1]]
        )
        super().build(batch_input_shape)
    def call(self, inputs):
        z = tf.matmul(inputs, self.dense.weights[0], transpose_b=True)
        return self.activation(z + self.biases)
    
# using the layer

dense_1 = keras.layers.Dense(100, activation="selu")
dense_2 = keras.layers.Dense(30, activation="selu")

tied_encoder = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    dense_1,
    dense_2
])

tied_decoder = keras.models.Sequential([
    DenseTranspose(dense_2, activation="selu"),
    DenseTranspose(dense_1 ,activation="sigmoid"),
    keras.layers.Reshape([28, 28])
])

tied_ae = keras.models..Sequential([tied_encoder, tied_decoder])

In [None]:
# convolutional autoencoder for working with images

conv_encoder = keras.models.Sequential([
    keras.layers.Reshape([28, 28, 1], input_shape=[28, 28]),
    keras.layers.Conv2D(16, kernel_size=3, padding="same", activation="selu"),
    keras.layers.MaxPool2D(pool_size=2),
    keras.layers.Conv2D(32, kernel_size=3, padding="same", activation="selu"),
    keras.layers.MaxPool(pool_size=2),
    keras.layers.Conv2D(64, kernel_size=3, padding="same", activation="selu"),
    keras.layers.MaxPool2D(pool_size=2)
])

conv_decoder= keras.models.Sequential([
    keras.layers.Conv2DTranspose(
        32, 
        kernel_size=3,
        strides=2,
        padding="valid",
        activation="selu",
        input_shape=[3, 3, 64]
    ),
    keras.layers.Conv2DTranspose(
        16, 
        kernel_size=3,
        strides=2,
        padding="same",
        activation="selu"
    ),
    keras.layers.Conv2DTranspose(
        1, 
        kernel_size=3,
        strides=2,
        padding="same",
        activation="sigmoid"
    ),
    keras.layers.Reshape([28, 28])
])

conv_ae = keras.models.Sequential([conv_encoder, conv_decoder])

In [None]:
# recurrent encoder for sequences

recurrent_encoder = keras.models.Sequential([
    keras.layers.LSTM(100, return_sequences=True, input_shape=[None, 28]),
    keras.layers.LSTM(30)
])

recurrent_decoder = keras.models.Sequential([
    keras.layers.LSTM(100, return_sequences=True, input_shape=[None, 28]),
    keras.layers.LSTM(30)
])

recurrent_decoder = keras.models.Sequential([
    keras.layers.RepeatVector(28, input_shape=[30]),
    keras.layers.LSTM(100, return_sequences=True),
    keras.layers.TimeDistributed(keras.layers.Dense(28, activation="sigmoid"))
])
recurrent_ae = keras.models.Sequential([recurrent_encoder, recurrent_decoder])

In [None]:
# can add dropout to a model to force it to learn useful features
# remember only train with dropout, don't fit

dropout_encoder = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(100, activation="selu"),
    keras.layers.Dense(30, activation="selu")
])

dropout_decoder = keras.models.Sequential([
    keras.layers.Dense(100, activation="selu", input_shape=[30]),
    keras.layers.Dense(28*28, activation="sigmoid"),
    keras.layers.Reshape([28, 28])
])

dropout_ae = keras.models.Sequential([dropout_encoder, dropout_decoder])

In [None]:
# sparse autoencoder, less neurons, each one has more info

sparse_l1_encoder = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.Dense(100, activation="selu"),
    keras.layers.Dense(300, activation="sigmoid"),
    keras.layers.ActivityRegularization(l1=1e-3) # punishes it the further from zero it gets
])                                               # but since it is also punished if it can't
                                                 # predict, it is forced to output something

sparse_l2_decoder = keras.models.Sequential([
    keras.layers.Dense(100, activation="selu", input_shape=[300]),
    keras.layers.Dense(28 * 28, activation = "sigmoid"),
    keras.layers.Reshape([28, 28])
])

sparse_l1_ae = keras.models.Sequential([sparse_l1_encoder, sparse_l1_decoder])

In [None]:
# Kullback-Leibler, punish if it strays from target activation

k = keras.backend

kl_divergence = keras.losses.kullback_leibler_divergence

class KLDivergenceRegularizer(keras.regularizers.Regularizer):
    def __init__(self, weight, target=0.1):
        self.weight = weight
        self.target = target
    def __call__(self, inputs):
        mean_activities = k.mean(inputs, axis=0)
        return self.weight * (
            kl_divergence(self.target, mean_activities) + \
            kl_divergence(1. - self.target, 1. - mean_activies)
        )
    
kld_reg = KLDivergenceRegularizer(weight=0.05, target=0.1)
sparse_kl_encoder = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.Dense(100, activation="selu"),
    keras.layers.Dense(300, activation="sigmoid", activity_regularizer=kld_reg)
])

sparse_kl_decoder = keras.models.Sequential([
    keras.layers.Dense(100, activation="selu", input_shape=[300]),
    keras.layers.Dense(28 * 28, activation="sigmoid"),
    keras.layers.Reshape([28, 28])
])

sparse_kl_ae = keras.models.Sequential([sparse_kl_encoder, sparse_kl_decoder])

In [None]:
# creating a variational autoencoder
# instead of a fixed encoding, it creates a mean, std encoding from training
# then when you use it later it samples from this distribution

k = keras.backend

class Sampling(keras.layers.layer):
    def call(self, inputs):
        mean, log_var = inputs
        return k.random_normal(tf.shape(log_var)) * k.exp(log_var / 2) + mean

# encoder
codings_size = 10
inputs = keras.layers.Input(shape=[28, 28])
z = keras.layers.Flatten()(inputs)
z = keras.layers.Dense(150, activation="selu")(z)
z = keras.layers.Dense(100, activation="selu")(z)
codings_mean = keras.layers.Dense(codings_size)(z)
codings_log_var = keras.layers.Dense(codings_size)(z) # actually better to use log of variance than std, faster
codings = Sampling()([codings_mean, codings_log_var])
variational_encoder = keras.Model([
    inputs = [inputs],
    outputs = [codings_mean, codings_log_var, codings]
])

# decoder
decoder_inputs = keras.layers.Input(shape=[codings_size])
x = keras.layers.Dense(100, activation="selu")(decoder_inputs)
x = keras.layers.Dense(150, activation="selu")(x)
x = keras.layers.Dense(28 * 28, activation="sigmoid")(x)
outputs = keras.layers.Reshape([28, 28])(x)
variational_decoder = keras.Model(inputs=[decoder_inputs], outputs=[outputs])

# adding it together
_, _, codings = variational_encoder(inputs)
reconstructions = variational_decoder(codings)
variational_ae = keras.Model(inputs=[inputs], outputs=[reconstructions])

# add the loss function
latent_loss = -0.5 * k.sum(1 + codings_log_var - k.exp(codings_log_var) - k.square(codings_mean), axis=-1)
variational_ae.add_loss(k.mean(latent_loss) / 784.)
variational_ae.compile(loss="binary_crossentropy", optimizer="rmsprop")

# training

history = variational_ae.fit(x_train, x_train, epochs=50, batch_size=128, validation_data=[x_valid, x_valid])

In [None]:
# generating images from a random distribution using decoder

codings = tf.random.normal(shape=[12, codings_size])
images = variational_decoder(codings).numpy()

# semantic interpolation, creating an intermediate between images

codings_grid = tf.reshape(codings, [1,2,3,4, codings_size]) # 12 codings to 3x4 grid
larger_grid = tf.image.resize(codings_grid, size=[5, 7]) # resize to 5x7
interpolated_codings = tf.reshape(larger_grid, [-1, codings_size])
images = variational_decoder(interpolated_codings).numpy()

In [None]:
# simple GAN

# generator is a decoder
# discriminator is a binary classifier

codings_size = 30
generator = keras.models.Sequential([
    keras.layers.Dense(100, activation="selu", input_shape=[codings_size]),
    keras.layers.Dense(150, activation="selu"),
    keras.layers.Dense(28 * 28, activation="selu"),
    keras.layers.Reshape([28, 28])
])

discriminator = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.Dense(150, activation="selu"),
    keras.layers.Dense(100, activation="selu"),
    keras.layers.Dense(1, activation="sigmoid"),
])

gan = keras.models.Sequential([generator, discriminator])

# compilation, discriminator can compile with a normal loss function, 
# generator only trained with gan model, don't compile beforehand

discriminator.compile(loss="binary_crossentropy", optimizer="rmsprop")
discriminator.trainable = False
gan.compile(loss="binary_crossentropy", optimizer="rmsprop")

# need to write a custom training loop

batch_size = 32
dataset = tf.data.Dataset.from_tensor_slices(x_train).shuffle(1000)
dataset = dataset.batch(batch_size, drop_remainder=True).prefetch(1)

def train_gan(gan, dataset, batch_size, codings_size, n_epochs=50):
    generator, discriminator = gan.layers
    for epoch in range(n_epochs):
        for x_batch in dataset:
            # training discriminator
            noise = tf.random.normal(shape=[batch_size, codings_size])
            generated_images = generator(noise)
            x_fake_and_real = tf.concat([generated_images, x_batch], axis=0)
            y1 = tf.constant([[0.]] * batch_size + [[1.]] * batch_size)
            discriminator.trainable = True
            discriminator.train_on_batch(x_fake_and_real, y1)
            # training generator
            noise = tf.random.normal(shape=[batch_size, codings_size])
            y2 = tf.constant([[1.]] * batch_size)
            discriminator.trainable = False
            gan.train_on_batch(noise, y2)
            
# calling it

train_gan(gan, dataset, batch_size, codings_size)

In [None]:
# deep convolutional GAN
# replace pooling with strided convolutions in the discriminator
# and transposed convolutions in the generator
# use batch normalization in both, except in generator output and discriminator input
# no FC layers in deeper architechtures
# use ReLU in all layers in generator except output which uses tanh
# use leaky ReLU in discriminator for all layers

codings_size = 100

generator = keras.models.Sequential([
    keras.layers.Dense(7 * 7 * 128, input_shape = [codings_size]),
    keras.layers.Reshape([7, 7, 128]),
    keras.layers.BatchNormalization(),
    keras.layers.Conv2DTranspose(64, kernel_size=5, strides=2, padding="same", activation="selu"),
    keras.layers.BatchNormalization(),
    keras.layers.Conv2DTranspose(1, kernel_size=5, strides=2, padding="same", activation="tanh")
])

discriminator = keras.models.Sequential([
    keras.layers.Conv2D(
        64, 
        kernel_size=5, 
        strides=2, 
        padding="same", 
        activation=keras.layers.LeakyReLU(0.2),
        input_shape=[28, 28, 1]
    ),
    keras.layers.Dropout(0.4),
    keras.layers.Conv2D(
        64, 
        kernel_size=5, 
        strides=2, 
        padding="same", 
        activation=keras.layers.LeakyReLU(0.2)
    ),
    keras.layers.Dropout(0.4),
    keras.layers.Flatten(),
    keras.layers.Dense(1, activation="sigmoid")
])

gan = keras.models.Sequential([generator, discriminator])

# discriminator needs input of [28, 28, 1]
x_train = x_train.reshape(-1, 28, 28, 1) * 2. - 1. # rescale to work with the tanh -1 to 1 range

# can call it with the same code as earlier
train_gan(gan, dataset, batch_size, codings_size)