Implement the Sparse auto-encoder and the Contractive Auto-encoder. Use the MNIST digit dataset for
training your network. Use the U-Net auto-encoder architecture for encoder and decoder without skip
connections. Let E be the trained encoder and D be the trained decoder, h = E(I) be the embedding of
an image I and let ˆI = D(h) be the output of the decoder.
(a) (20 points) Plot the t-sne (use inbuilt function) on the embeddings obtained using the respective
auto-encoders. Color the clusters using the respective ground-truth class labels.
(b) (20 points) Randomly take two images I1 and I2 from two different digit classes. Let h1 = E(I1)
and h2 = E(I2) be the embeddings for these images, respectively. Construct another image Iα =
αI1+(1−α)I2 for α = 0, 0.2, 0.4, 0.6, 0.8, 1. Find the embedding hα of this image Iα for all values of α
by passing it through the encoder. Also, consider the approximate embedding h
′
α = αh1 + (1−α)h2
by using directly the embeddings of the images I1 and I2. Also, find ˆIα = D(hα) and ˆI
′
α = D(h
′
α).
Plot the images ˆIα and ˆI
′
α side by side for different values of α. Do this for 20 pairs (I1, I2). Report
PSNR between ˆIα and ˆI
′
α and find ∥hˆ
α − hˆ′
α∥2 for all values of alpha.
(c) (20 points) After training the autoencoders, you want to check if the embeddings of different digits
are different and embeddings within a class are similar. For this purpose, you propose to perform
the classification of the digits based on the embeddings obtained by the encoders and check the
accuracy of classifications for each of the Auto-encoder. Report the classification accuracy for each
of the AE and report which one is better. Use any inbuilt classifier to solve the classification
problem.

In [1]:
%pip install tensorflow numpy pandas matplotlib scikit-learn seaborn

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import mnist
import numpy as np

In [3]:
# Hyperparameters
batch_size = 128
epochs = 20
learning_rate = 0.001
sparse_lambda = 1e-3  # Sparsity penalty
contractive_lambda = 1e-4  # Contractive penalty
rho = 0.05  # Target sparsity

# Load and preprocess MNIST dataset
(x_train, _), (_, _) = mnist.load_data()
x_train = x_train.astype('float32') / 255.0
x_train = np.expand_dims(x_train, axis=-1)  # Shape: (60000, 28, 28, 1)
x_train = (x_train - 0.1307) / 0.3081  # Normalize as per PyTorch example
train_dataset = tf.data.Dataset.from_tensor_slices(x_train).shuffle(60000).batch(batch_size)

In [17]:
import tensorflow as tf

def encoder_block(inputs, num_filters):
    x = layers.Conv2D(num_filters, 3, padding='valid')(inputs)
    x = layers.Activation('relu')(x)    
    x = layers.Conv2D(num_filters, 3, padding='valid')(x)
    x = layers.Activation('relu')(x)
    x = layers.MaxPool2D(pool_size=(2, 2), strides=2)(x)
    return x
    
def decoder_block(inputs, skip_features, num_filters):
    x = layers.Conv2DTranspose(num_filters, (2, 2), strides=2, padding='valid')(inputs)
    x = layers.Conv2D(num_filters, 3, padding='valid')(x)
    x = layers.Activation('relu')(x)
    x = layers.Conv2D(num_filters, 3, padding='valid')(x)
    x = layers.Activation('relu')(x)
    return x

class UNetAutoEncoderArchitecture():
    def build_encoder(self, input_shape=(28, 28, 1)):
        inputs= layers.Input(shape=input_shape)
        x = encoder_block(inputs, 64)
        x = encoder_block(x, 128)
        x = encoder_block(x, 256)        
        return models.Model(inputs=inputs, outputs=x, name='encoder')
    
    def build_decoder(self, input_shape=(128,)):
        inputs = layers.Input(shape=input_shape)
        x = decoder_block(inputs, None, 256)
        x = decoder_block(x, None, 128)
        x = decoder_block(x, None, 64)
        return models.Model(inputs=inputs, outputs=x, name='decoder')


In [18]:
class SparseAutoencoder(models.Model):
    def __init__(self, input_shape):
        super(SparseAutoencoder, self).__init__()
        self.encoder = UNetAutoEncoderArchitecture().build_encoder(input_shape)
        self.decoder = UNetAutoEncoderArchitecture().build_decoder()
        self.flatten = layers.Flatten()
        self.dense = layers.Dense(128, activation='relu')
        self.output_layer = layers.Dense(np.prod(input_shape), activation='sigmoid')
    
    def call(self, inputs):
        x = self.encoder(inputs)
        x = self.flatten(x)
        x = self.dense(x)
        x = self.output_layer(x)
        return x

In [None]:
class ContractiveAutoencoder(models.Model):
    def __init__(self, input_shape):
        super(ContractiveAutoencoder, self).__init__()
        self.encoder = UNetAutoEncoderArchitecture().build_encoder()
        self.decoder = UNetAutoEncoderArchitecture().build_decoder()
        self.flatten = layers.Flatten()
        self.dense = layers.Dense(128, activation='relu')
        self.output_layer = layers.Dense(np.prod(input_shape), activation='sigmoid')
    
    def call(self, inputs):
        x = self.encoder(inputs)
        x = self.flatten(x)
        x = self.dense(x)
        x = self.output_layer(x)
        return x
    

In [14]:
def sparse_loss(y_true, y_pred):
    mse_loss = tf.reduce_mean(keras.losses.MeanSquaredError()(y_true, y_pred))
    hidden_layer_output = encoder(y_true)
    mean_activation = tf.reduce_mean(hidden_layer_output, axis=0)

    kl_divergence = tf.reduce_sum(sparsity_level * tf.math.log(sparsity_level / (mean_activation + 1e-10)) +
                                  (1 - sparsity_level) * tf.math.log((1 - sparsity_level) / (1 - mean_activation + 1e-10)))

    return mse_loss + lambda_sparse * kl_divergence

In [20]:
sparse_ae = SparseAutoencoder(input_shape=(28, 28, 1))
sparse_ae.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), loss=sparse_loss)
sparse_ae.fit(train_dataset, epochs=epochs)

ValueError: Input 0 of layer "conv2d_transpose_1" is incompatible with the layer: expected min_ndim=4, found ndim=2. Full shape received: (None, 128)

In [None]:
reconstructed = autoencoder.predict(x_test)

In [None]:
n = 10
plt.figure(figsize=(20, 4))
for i in range(n):
    # Original images
    ax = plt.subplot(2, n, i + 1)
    plt.imshow(x_test[i].reshape(28, 28), cmap='gray')
    plt.title("Original")
    plt.axis('off')

    # Reconstructed images
    ax = plt.subplot(2, n, i + 1 + n)
    plt.imshow(reconstructed[i].reshape(28, 28), cmap='gray')
    plt.title("Reconstructed")
    plt.axis('off')
plt.show()