# 02. Mini Autoencoder

This notebook implements a minimal autoencoder to learn compressed representations of MNIST images.

## Experiment Overview
- **Goal**: Learn compressed representations of MNIST images using an autoencoder
- **Model**: Encoder-decoder architecture (784 → 32 → 784)
- **Features**: Reconstruction visualization, latent space exploration
- **Learning**: Understanding unsupervised learning and dimensionality reduction

## What You'll Learn
- Building encoder-decoder architectures
- Unsupervised learning with autoencoders
- Latent space visualization
- Reconstruction quality assessment


In [None]:
# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
import sys
import os
from sklearn.manifold import TSNE

# Add scripts directory to path
sys.path.append('../scripts')
from utils import load_mnist_data, get_device, set_seed
from train import train_model
from evaluate import ModelEvaluator

# Set random seed for reproducibility
set_seed(42)

# Get device
device = get_device()
print(f"Using device: {device}")


In [None]:
# Define the Mini Autoencoder model
class MiniAutoencoder(nn.Module):
    def __init__(self, input_size=784, hidden_size=32):
        super(MiniAutoencoder, self).__init__()
        
        # Encoder
        self.encoder = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.ReLU(),
            nn.Linear(128, hidden_size),
            nn.ReLU()
        )
        
        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(hidden_size, 128),
            nn.ReLU(),
            nn.Linear(128, input_size),
            nn.Sigmoid()  # Output between 0 and 1
        )
        
    def forward(self, x):
        # Flatten input
        x = x.view(x.size(0), -1)
        
        # Encode
        encoded = self.encoder(x)
        
        # Decode
        decoded = self.decoder(encoded)
        
        return decoded, encoded
    
    def encode(self, x):
        """Encode input to latent space."""
        x = x.view(x.size(0), -1)
        return self.encoder(x)
    
    def decode(self, z):
        """Decode from latent space."""
        return self.decoder(z)

# Create model instance
model = MiniAutoencoder().to(device)

# Print model summary
print("Model Architecture:")
print(model)
print(f"\nTotal parameters: {sum(p.numel() for p in model.parameters()):,}")
print(f"Model size: {sum(p.numel() for p in model.parameters()) * 4 / 1024 / 1024:.2f} MB")


In [None]:
# Load MNIST dataset
print("Loading MNIST dataset...")
train_loader, val_loader, test_loader = load_mnist_data(batch_size=64, test_split=0.2)

print(f"Training samples: {len(train_loader.dataset)}")
print(f"Validation samples: {len(val_loader.dataset)}")
print(f"Test samples: {len(test_loader.dataset)}")

# Visualize some training samples
fig, axes = plt.subplots(2, 5, figsize=(12, 6))
for i in range(10):
    row, col = i // 5, i % 5
    # Get a batch and show first sample
    data, target = next(iter(train_loader))
    axes[row, col].imshow(data[0].squeeze(), cmap='gray')
    axes[row, col].set_title(f'Label: {target[0].item()}')
    axes[row, col].axis('off')
plt.tight_layout()
plt.show()


In [None]:
# Custom training function for autoencoder
def train_autoencoder(model, train_loader, val_loader, epochs=20, lr=0.001, device='cpu'):
    """Train autoencoder with reconstruction loss."""
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()
    
    train_losses = []
    val_losses = []
    
    for epoch in range(epochs):
        # Training
        model.train()
        train_loss = 0
        for batch_idx, (data, _) in enumerate(train_loader):
            data = data.to(device)
            optimizer.zero_grad()
            
            # Forward pass
            reconstructed, _ = model(data)
            loss = criterion(reconstructed, data.view(data.size(0), -1))
            
            # Backward pass
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
        
        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for data, _ in val_loader:
                data = data.to(device)
                reconstructed, _ = model(data)
                loss = criterion(reconstructed, data.view(data.size(0), -1))
                val_loss += loss.item()
        
        train_loss /= len(train_loader)
        val_loss /= len(val_loader)
        
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        
        if (epoch + 1) % 5 == 0:
            print(f'Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.6f}, Val Loss: {val_loss:.6f}')
    
    return train_losses, val_losses

# Train the autoencoder
print("Starting autoencoder training...")
train_losses, val_losses = train_autoencoder(model, train_loader, val_loader, epochs=20, device=device)

# Plot training history
plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Validation Loss')
plt.title('Autoencoder Training History')
plt.xlabel('Epoch')
plt.ylabel('Reconstruction Loss')
plt.legend()
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Validation Loss')
plt.title('Training History (Log Scale)')
plt.xlabel('Epoch')
plt.ylabel('Reconstruction Loss (log)')
plt.yscale('log')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.savefig('../results/plots/autoencoder_training.png', dpi=300, bbox_inches='tight')
plt.show()


In [None]:
# Visualize reconstructions
model.eval()
with torch.no_grad():
    # Get a batch of test data
    data, target = next(iter(test_loader))
    data = data.to(device)
    
    # Get reconstructions
    reconstructed, encoded = model(data)
    
    # Convert to numpy for visualization
    original = data.cpu().numpy()
    reconstructed = reconstructed.cpu().numpy()
    
    # Show original vs reconstructed
    fig, axes = plt.subplots(4, 10, figsize=(15, 6))
    for i in range(10):
        # Original images
        axes[0, i].imshow(original[i].squeeze(), cmap='gray')
        axes[0, i].set_title(f'Original {target[i].item()}')
        axes[0, i].axis('off')
        
        # Reconstructed images
        axes[1, i].imshow(reconstructed[i].reshape(28, 28), cmap='gray')
        axes[1, i].set_title('Reconstructed')
        axes[1, i].axis('off')
        
        # Difference
        diff = np.abs(original[i].squeeze() - reconstructed[i].reshape(28, 28))
        axes[2, i].imshow(diff, cmap='hot')
        axes[2, i].set_title('Difference')
        axes[2, i].axis('off')
        
        # Latent space (first 2 dimensions)
        axes[3, i].scatter(encoded[i, 0].cpu(), encoded[i, 1].cpu(), c=target[i].item(), cmap='tab10')
        axes[3, i].set_title('Latent (2D)')
        axes[3, i].axis('off')
    
    plt.tight_layout()
    plt.savefig('../results/plots/autoencoder_reconstructions.png', dpi=300, bbox_inches='tight')
    plt.show()


In [None]:
# Visualize latent space using t-SNE
print("Computing t-SNE visualization of latent space...")

# Collect all encoded representations and labels
all_encoded = []
all_labels = []

model.eval()
with torch.no_grad():
    for data, target in test_loader:
        data = data.to(device)
        _, encoded = model(data)
        all_encoded.append(encoded.cpu().numpy())
        all_labels.append(target.numpy())

all_encoded = np.vstack(all_encoded)
all_labels = np.hstack(all_labels)

# Use t-SNE to reduce to 2D
tsne = TSNE(n_components=2, random_state=42, perplexity=30)
encoded_2d = tsne.fit_transform(all_encoded)

# Plot latent space
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
scatter = plt.scatter(encoded_2d[:, 0], encoded_2d[:, 1], c=all_labels, cmap='tab10', alpha=0.6)
plt.colorbar(scatter)
plt.title('Latent Space (t-SNE)')
plt.xlabel('t-SNE 1')
plt.ylabel('t-SNE 2')

# Plot latent space colored by digit
plt.subplot(1, 2, 2)
for digit in range(10):
    mask = all_labels == digit
    plt.scatter(encoded_2d[mask, 0], encoded_2d[mask, 1], 
               label=f'Digit {digit}', alpha=0.6, s=20)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.title('Latent Space by Digit')
plt.xlabel('t-SNE 1')
plt.ylabel('t-SNE 2')

plt.tight_layout()
plt.savefig('../results/plots/autoencoder_latent_space.png', dpi=300, bbox_inches='tight')
plt.show()

# Calculate reconstruction error
mse = np.mean((all_encoded - all_encoded) ** 2)  # This should be 0, let's calculate actual reconstruction error
print(f"Latent space dimension: {all_encoded.shape[1]}")
print(f"Compression ratio: {784 / all_encoded.shape[1]:.1f}x")
