In [37]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import mnist
import numpy as np

In [38]:
# Set random seed for reproducibility
tf.random.set_seed(42)

# Hyperparameters
batch_size = 128
epochs = 20
learning_rate = 0.001
sparse_lambda = 1e-3  # Sparsity penalty
contractive_lambda = 1e-4  # Contractive penalty
rho = 0.05  # Target sparsity

# Load and preprocess MNIST dataset
(x_train, _), (_, _) = mnist.load_data()
x_train = x_train.astype('float32') / 255.0
x_train = np.expand_dims(x_train, axis=-1)  # Shape: (60000, 28, 28, 1)
x_train = (x_train - 0.1307) / 0.3081  # Normalize as per PyTorch example
train_dataset = tf.data.Dataset.from_tensor_slices(x_train).shuffle(60000).batch(batch_size)

In [39]:
# U-Net-like Encoder
def build_encoder():
    inputs = layers.Input(shape=(28, 28, 1))
    x = layers.Conv2D(64, 3, padding='same', activation='relu')(inputs)  # 28x28x64
    x = layers.MaxPooling2D(2)(x)  # 14x14x64
    x = layers.Conv2D(128, 3, padding='same', activation='relu')(x)  # 14x14x128
    x = layers.MaxPooling2D(2)(x)  # 7x7x128
    x = layers.Conv2D(256, 3, padding='same', activation='relu')(x)  # 7x7x256
    x = layers.MaxPooling2D(2)(x)  # 3x3x256
    x = layers.Flatten()(x)
    z = layers.Dense(128)(x)  # Latent space
    return models.Model(inputs, z, name='encoder')


In [40]:
# U-Net-like Decoder
def build_decoder():
    inputs = layers.Input(shape=(128,))
    x = layers.Dense(256 * 3 * 3, activation='relu')(inputs)
    x = layers.Reshape((3, 3, 256))(x)
    x = layers.Conv2DTranspose(128, 3, strides=2, padding='same', activation='relu')(x)  # 6x6x128
    x = layers.Conv2DTranspose(64, 3, strides=2, padding='same', activation='relu')(x)  # 12x12x64
    x = layers.Conv2DTranspose(1, 3, strides=2, padding='same', activation='sigmoid')(x)  # 28x28x1
    x = layers.Conv2D(1, 3, padding='valid', activation='sigmoid')(x)  # 22x22x1
    x = layers.ZeroPadding2D(padding=((3, 3), (3, 3)))(x)  # 28x28x1
    return models.Model(inputs, x, name='decoder')

In [41]:
# Sparse Autoencoder
class SparseAutoencoder(models.Model):
    def __init__(self):
        super(SparseAutoencoder, self).__init__()
        self.encoder = build_encoder()
        self.decoder = build_decoder()

    def call(self, inputs):
        z = self.encoder(inputs)
        recon = self.decoder(z)
        return recon, z

In [42]:
# Contractive Autoencoder
class ContractiveAutoencoder(models.Model):
    def __init__(self):
        super(ContractiveAutoencoder, self).__init__()
        self.encoder = build_encoder()
        self.decoder = build_decoder()

    def call(self, inputs):
        z = self.encoder(inputs)
        recon = self.decoder(z)
        return recon, z

In [43]:
def sparse_ae_loss(y_true, y_pred, z):
    mse_loss = tf.reduce_mean(tf.keras.losses.mse(y_true, y_pred))
    rho_hat = tf.reduce_mean(z, axis=0)
    kl_div = rho * tf.math.log(rho / (rho_hat + 1e-10)) + (1 - rho) * tf.math.log((1 - rho) / (1 - rho_hat + 1e-10))
    kl_loss = sparse_lambda * tf.reduce_sum(kl_div)
    return mse_loss + kl_loss

# Contractive Autoencoder Loss
def contractive_ae_loss(x, recon, z, model):
    mse_loss = tf.reduce_mean(tf.square(x - recon))
    with tf.GradientTape() as tape:
        tape.watch(z)
        recon = model.decoder(z)
    grad_z = tape.gradient(recon, z)
    j_loss = contractive_lambda * tf.reduce_mean(tf.reduce_sum(tf.square(grad_z), axis=1))
    return mse_loss + j_loss

In [44]:
# Training function
def train(model, dataset, loss_fn, epochs, model_type='sparse'):
    optimizer = tf.keras.optimizers.Adam(learning_rate)
    for epoch in range(epochs):
        total_loss = 0
        for batch in dataset:
            with tf.GradientTape() as tape:
                recon, z = model(batch)
                if model_type == 'sparse':
                    loss = loss_fn(batch, recon, z)
                else:
                    loss = loss_fn(batch, recon, z, model)
            gradients = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(zip(gradients, model.trainable_variables))
            total_loss += loss.numpy()
        print(f'Epoch {epoch+1}/{epochs}, {model_type.capitalize()} AE Loss: {total_loss / len(dataset):.6f}')

In [None]:
# Initialize and compile models
sparse_ae = SparseAutoencoder()
contractive_ae = ContractiveAutoencoder()

# Train Sparse Autoencoder
print("Training Sparse Autoencoder...")
train(sparse_ae, train_dataset, sparse_ae_loss, epochs, model_type='sparse')

# Train Contractive Autoencoder
print("\nTraining Contractive Autoencoder...")
train(contractive_ae, train_dataset, contractive_ae_loss, epochs, model_type='contractive')

Training Sparse Autoencoder...
