In [2]:
import tensorflow as tf
from tensorflow.keras import layers, models, datasets
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import numpy as np

# =======================
# 1. Load MNIST dataset
# =======================
(x_train, y_train), (x_test, y_test) = datasets.mnist.load_data()

# Normalize & Flatten
x_train = x_train.astype("float32") / 255.0
x_test  = x_test.astype("float32") / 255.0

x_train = x_train.reshape((len(x_train), 784))
x_test  = x_test.reshape((len(x_test), 784))

# =======================
# 2. Build Autoencoder + Classifier
# =======================
input_img = layers.Input(shape=(784,))

# Encoder
encoded = layers.Dense(128, activation='relu')(input_img)
encoded = layers.Dense(64, activation='relu')(encoded)
bottleneck = layers.Dense(32, activation='relu', name="bottleneck")(encoded)

# Decoder (for reconstruction)
decoded = layers.Dense(64, activation='relu')(bottleneck)
decoded = layers.Dense(128, activation='relu')(decoded)
decoded = layers.Dense(784, activation='sigmoid', name="reconstruction")(decoded)

# Classifier head
classifier_output = layers.Dense(10, activation='softmax', name="classification")(bottleneck)

# Combined model
combined = models.Model(input_img, [decoded, classifier_output])

# Compile with two outputs (must match layer names!)
combined.compile(
    optimizer='adam',
    loss={
        "reconstruction": "mse",
        "classification": "sparse_categorical_crossentropy"
    },
    loss_weights={"reconstruction": 0.5, "classification": 1.0},
    metrics={"classification": "accuracy"}
)

combined.summary()

# =======================
# 3. Train the model
# =======================
history = combined.fit(
    x_train,
    {"reconstruction": x_train, "classification": y_train},
    epochs=10,
    batch_size=256,
    validation_split=0.2,
    verbose=2
)

# =======================
# 4. Evaluate on test set
# =======================
test_loss, recon_loss, cls_loss, cls_acc = combined.evaluate(
    x_test,
    {"reconstruction": x_test, "classification": y_test},
    verbose=2
)
print(f"\nTest Classification Accuracy: {cls_acc:.4f}")

# =======================
# 5. Predictions
# =======================
decoded_imgs, predictions = combined.predict(x_test)
predicted_labels = np.argmax(predictions, axis=1)

# Show sample predictions + reconstructions
n = 10
plt.figure(figsize=(15, 3))
for i in range(n):
    # Original
    ax = plt.subplot(2, n, i + 1)
    plt.imshow(x_test[i].reshape(28, 28), cmap="gray")
    plt.title(f"Pred: {predicted_labels[i]}\nTrue: {y_test[i]}")
    plt.axis("off")

    # Reconstructed
    ax = plt.subplot(2, n, i + 1 + n)
    plt.imshow(decoded_imgs[i].reshape(28, 28), cmap="gray")
    plt.title("Reconstructed")
    plt.axis("off")
plt.show()

# =======================
# 6. Latent space visualization (t-SNE)
# =======================
# Extract encoder (up to bottleneck)
encoder = models.Model(input_img, bottleneck)

# Get bottleneck features
latent_vectors = encoder.predict(x_test[:2000])  # subset for speed

# Apply t-SNE
tsne = TSNE(n_components=2, random_state=42, perplexity=30)
latent_2d = tsne.fit_transform(latent_vectors)

# Plot clusters
plt.figure(figsize=(8, 6))
scatter = plt.scatter(latent_2d[:, 0], latent_2d[:, 1], c=y_test[:2000], cmap="tab10", alpha=0.7)
plt.colorbar(scatter, ticks=range(10))
plt.title("t-SNE of Latent Space (Bottleneck Layer)")
plt.show()


Epoch 1/10


ValueError: Dimensions must be equal, but are 784 and 10 for '{{node compile_loss/mse/sub}} = Sub[T=DT_FLOAT](data_1, functional_1_1/classification_1/Softmax)' with input shapes: [?,784], [?,10].