In [None]:
!pip install -r requirements.txt



In [None]:
# ===== LIBRARY IMPORTS AND SETUP =====

# PyTorch libraries for deep learning
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

# torchvision for datasets and transforms
from torchvision.datasets import FashionMNIST
from torchvision import transforms

# NumPy for numerical operations
import numpy as np

# Scikit-learn for clustering and evaluation metrics
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from sklearn.metrics import normalized_mutual_info_score, adjusted_rand_score, silhouette_score

# Check for available device (GPU/CPU/MPS for Apple Silicon)
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
print("Device =", device)

In [None]:
# ===== DATA LOADING AND PREPARATION =====
# Fashion-MNIST Dataset Setup

# Define transformation pipeline for images
# Convert images to tensors (normalizes to [0,1] range)
transform = transforms.Compose([
    transforms.ToTensor()
])

# Download and load Fashion-MNIST dataset
# Fashion-MNIST contains 70,000 grayscale images (28x28) of 10 fashion categories
# - Training set: 60,000 images
# - Test set: 10,000 images
train_ds = FashionMNIST(root="./data", train=True, download=True, transform=transform)
test_ds  = FashionMNIST(root="./data", train=False, download=True, transform=transform)

# Create DataLoaders for batch processing
# DataLoader handles shuffling, batching, and parallel loading
train_loader = DataLoader(train_ds, batch_size=128, shuffle=True)
test_loader  = DataLoader(test_ds, batch_size=128, shuffle=False)

print(f"Training set size: {len(train_ds)} images")
print(f"Test set size: {len(test_ds)} images")

In [None]:
# ===== AUTOENCODER MODEL =====
# Standard Autoencoder for learning latent representations

class AE(nn.Module):
    """
    Standard Autoencoder (AE) for unsupervised representation learning.
    
    Architecture:
    - Encoder: 784 → 256 → latent_dim
    - Decoder: latent_dim → 256 → 784
    - Uses ReLU activations
    - Output layer uses Sigmoid to match input range [0,1]
    
    The autoencoder learns to compress images into a lower-dimensional
    latent space and reconstruct them, capturing the most important features.
    """
    def __init__(self, latent_dim=32):
        super().__init__()
        
        # Encoder: compresses input to latent representation
        self.encoder = nn.Sequential(
            nn.Flatten(),                    # Convert 28×28 image to 784-dim vector
            nn.Linear(784, 256), nn.ReLU(),  # First encoding layer
            nn.Linear(256, latent_dim)       # Latent representation
        )
        
        # Decoder: reconstructs image from latent representation
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 256), nn.ReLU(),  # First decoding layer
            nn.Linear(256, 784),                   # Output to original dimension
            nn.Sigmoid()                           # Sigmoid for [0,1] output range
        )

    def encode(self, x):
        """Encode input to latent space"""
        return self.encoder(x)

    def forward(self, x):
        """Full autoencoder forward pass"""
        z = self.encoder(x)  # Get latent representation
        x_hat = self.decoder(z).view(-1, 1, 28, 28)  # Reconstruct and reshape
        return x_hat, z

In [4]:
class VAE(nn.Module):
    def __init__(self, latent_dim=16):
        super().__init__()
        
        self.encoder = nn.Sequential(
            nn.Flatten(),
            nn.Linear(784, 256), nn.ReLU(),
            nn.Linear(256, 64), nn.ReLU()
        )
        self.mu = nn.Linear(64, latent_dim)
        self.logvar = nn.Linear(64, latent_dim)
        
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 64), nn.ReLU(),
            nn.Linear(64, 256), nn.ReLU(),
            nn.Linear(256, 784), nn.Sigmoid()
        )
    
    def encode(self, x):
        h = self.encoder(x)
        return self.mu(h), self.logvar(h)
    
    def reparam(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std
    
    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparam(mu, logvar)
        x_hat = self.decoder(z).view(-1, 1, 28, 28)
        return x_hat, mu, logvar, z

def vae_loss(x_hat, x, mu, logvar):
    recon = nn.functional.mse_loss(x_hat, x, reduction="sum")
    kl = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return (recon + kl) / x.size(0)

In [5]:
def train_ae(epochs):
    model_ae = AE(latent_dim=32).to(device)
    optimizer = optim.Adam(model_ae.parameters(), lr=1e-3)
    criterion = nn.MSELoss()
    model_ae.train()

    for epoch in range(epochs):
        total_loss = 0
        for x, _ in train_loader:
            x = x.to(device)
            x_hat, _ = model_ae(x)

            loss = criterion(x_hat, x)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        # print(f"Epoch {epoch+1} - Loss = {total_loss/len(train_loader):.4f}")
    
    return model_ae


def train_vae(epochs):
    model_vae = VAE(latent_dim=16).to(device)
    opt = optim.Adam(model_vae.parameters(), lr=1e-3)

    model_vae.train()

    for epoch in range(epochs):
        total = 0
        for x, _ in train_loader:
            x = x.to(device)
            x_hat, mu, logvar, _ = model_vae(x)
            
            loss = vae_loss(x_hat, x, mu, logvar)
            opt.zero_grad()
            loss.backward()
            opt.step()
            
            total += loss.item()
        
        # print(f"Epoch {epoch+1} — Loss = {total/len(train_loader):.4f}")
    
    return model_vae


In [6]:
def ae_get_latent(model, loader):
    model.eval()
    Z, Y = [], []
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device)
            _, z = model(x)
            Z.append(z.cpu().numpy())
            Y.append(y.numpy())
    return np.vstack(Z), np.hstack(Y)

def vae_get_latent(model, loader):
    model.eval()
    Z, Y = [], []
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device)
            _, mu, _, _ = model(x)
            Z.append(mu.cpu().numpy())
            Y.append(y.numpy())
    return np.vstack(Z), np.hstack(Y)

In [7]:
def calculate_model_performance(test_cluster, test_y, test_z):
    nmi = normalized_mutual_info_score(test_y, test_cluster)
    ari = adjusted_rand_score(test_y, test_cluster)
    sil = silhouette_score(test_z, test_cluster)

    print("NMI:", nmi)
    print("ARI:", ari)
    print("Silhouette:", sil)


In [8]:
def test_ae_kmeans(train_epochs):
    model_ae = train_ae(epochs=train_epochs)
    train_z, train_y = ae_get_latent(model=model_ae, loader=train_loader)
    test_z,  test_y  = ae_get_latent(model=model_ae, loader=test_loader)
    
    print('Test on AutoEncoder (AE) + KMeans:')
    kmeans = KMeans(n_clusters=10, random_state=42)
    kmeans.fit(train_z)

    test_cluster = kmeans.predict(test_z)
    calculate_model_performance(test_cluster, test_y, test_z)

def test_vae_gmm(train_epochs):
    model_vae = train_vae(epochs=train_epochs)
    train_z, train_y = vae_get_latent(model=model_vae, loader=train_loader)
    test_z,  test_y  = vae_get_latent(model=model_vae, loader=test_loader)

    print('Test on Variational AutoEncoder (VAE) + Gaussian Mixture Model (GMM):')
    gmm = GaussianMixture(n_components=10, covariance_type='full', random_state=42)
    gmm.fit(train_z)

    test_cluster = gmm.predict(test_z)
    calculate_model_performance(test_cluster, test_y, test_z)


In [9]:
test_ae_kmeans(20)
print('\n\n')
test_vae_gmm(20)

Test on AutoEncoder (AE) + KMeans:
NMI: 0.5185857007930816
ARI: 0.32855891953647626
Silhouette: 0.16779543459415436



Test on Variational AutoEncoder (VAE) + Gaussian Mixture Model (GMM):
NMI: 0.5906259569507099
ARI: 0.43468771386721
Silhouette: 0.15230272710323334
