# Autoencoders

## Data

### Import and load datasets

In [43]:
from torchvision.datasets import MNIST, FashionMNIST, KMNIST

mnist = MNIST(
    root="data/mnist",
    download=True,
)

mnist_test = MNIST(root="data/mnist", download=True, train=False)

fashion = FashionMNIST(
    root="data/fmnist",
    download=True,
)
fashion_test = FashionMNIST(root="data/fmnist", download=True, train=False)

kuzushiji = KMNIST(
    root="data/kmnist",
    download=True,
)
kuzushiji_test = KMNIST(root="data/kmnist", download=True, train=False)

In [44]:
mnist_train_data = mnist.data
mnist_train_labels = mnist.train_labels
mnist_test_data = mnist_test.data
mnist_test_labels = mnist_test.targets

fashion_train_data = fashion.train_data
fashion_train_labels = fashion.train_labels
fashion_test_data = fashion_test.data
fashion_test_labels = fashion_test.targets

kuzushiji_train_data = kuzushiji.train_data
kuzushiji_train_labels = kuzushiji.train_labels
kuzushiji_test_data = kuzushiji_test.data
kuzushiji_test_labels = kuzushiji_test.targets



### Normalize the image histogram

In [45]:
import numpy as np


def normalize(images):
    histograms = np.apply_along_axis(
        np.bincount, 1, images.reshape(images.shape[0], -1), minlength=256
    )

    cdf = histograms.cumsum(axis=1)
    cdf_min = cdf[:, 0][
        :, None
    ]  # Minimum of the CDF (first non-zero element in each row)

    # Normalize the CDF for each image
    cdf_m = ((cdf - cdf_min) * 255) / (cdf.max(axis=1)[:, None] - cdf_min)

    cdf_m = cdf_m.astype(np.uint8)

    # Apply normalized CDF to each image
    normalized_images = cdf_m[
        np.arange(images.shape[0])[:, None, None], images
    ]  #  Broadcasting across images and index

    return normalized_images


mnist_train_data = normalize(mnist_train_data)/265
mnist_test_data = normalize(mnist_test_data)/265

fashion_train_data = normalize(fashion_train_data)/265
fashion_test_data = normalize(fashion_test_data)/265

kuzushiji_train_data = normalize(kuzushiji_train_data)/265
kuzushiji_test_data = normalize(kuzushiji_test_data)/265

### Convert to torch

In [46]:
import torch

mnist_train_data = torch.tensor(mnist_train_data,dtype=torch.float32)
mnist_test_data = torch.tensor(mnist_test_data,dtype=torch.float32)

fashion_train_data = torch.tensor(fashion_train_data,dtype=torch.float32)
fashion_test_data = torch.tensor(fashion_test_data,dtype=torch.float32)

kuzushiji_train_data = torch.tensor(kuzushiji_train_data,dtype=torch.float32)
kuzushiji_test_data = torch.tensor(kuzushiji_test_data,dtype=torch.float32)

---

## Model

### Architecture

In [47]:
import torch.nn as nn

# Define the autoencoder class
class Autoencoder(nn.Module):
    def __init__(self, input_dim=784, embedding_dim=196):
        super(Autoencoder, self).__init__()
        # Encoder
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, embedding_dim),
        )
        
        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(embedding_dim, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, input_dim),
            nn.Sigmoid(),
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

In [None]:
# Define the autoencoder class
class ConvAutoencoder(nn.Module):
    def __init__(self):
        super(ConvAutoencoder, self).__init__()
        # Encoder
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(32 * 7 * 7, 196),
        )
        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(196, 32 * 7 * 7),
            nn.ReLU(),
            nn.Unflatten(1, (32, 7, 7)),
            nn.ConvTranspose2d(32, 16, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(16, 1, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.Sigmoid()
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

### Hyperparameters

In [48]:
input_dim = 784
embedding_dim = 196
batch_size = 64
epochs = 50
learning_rate = 1e-3

### Datasets

In [49]:
from torch.utils.data import DataLoader, TensorDataset

train_dataset = TensorDataset(mnist_train_data)
test_dataset = TensorDataset(mnist_test_data)

### Training setup

### Training loop function

In [50]:
def training_loop(model, optimizer, train_dataset, test_dataset, dataset_name):

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    # Training loop
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for batch in train_loader:
            images = batch[0]
            images = images.view(-1, 784).to(DEVICE)
    
            outputs = model(images)
            loss = criterion(outputs, images)
    
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    
            train_loss += loss.item()
    
        train_loss /= len(train_dataset)
    
        model.eval()
        test_loss = 0
        with torch.no_grad():
            for batch in test_loader:
                images = batch[0]
                images = images.view(-1, 784).to(DEVICE)
                outputs = model(images)
                loss = criterion(outputs, images)
                test_loss += loss.item()
    
        test_loss /= len(test_dataset)
    
        if epoch % 5 == 0:
            print(
                f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.6f}, Test Loss: {test_loss:.6f}"
            )
    
    # Save the trained model
    torch.save(model.state_dict(), f"autoencoder_{dataset_name}.pth")

In [51]:
import torch.optim as optim

# Initialize the model, loss function, and optimizer
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.BCELoss()

model = Autoencoder(input_dim=input_dim, embedding_dim=embedding_dim).to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

training_loop(model, optimizer, train_dataset, test_dataset, "mnist")

Epoch 1/50, Train Loss: 0.002625, Test Loss: 0.001980
Epoch 6/50, Train Loss: 0.001605, Test Loss: 0.001611
Epoch 11/50, Train Loss: 0.001547, Test Loss: 0.001562
Epoch 16/50, Train Loss: 0.001518, Test Loss: 0.001542
Epoch 21/50, Train Loss: 0.001498, Test Loss: 0.001521
Epoch 26/50, Train Loss: 0.001483, Test Loss: 0.001502
Epoch 31/50, Train Loss: 0.001472, Test Loss: 0.001493
Epoch 36/50, Train Loss: 0.001462, Test Loss: 0.001479
Epoch 41/50, Train Loss: 0.001453, Test Loss: 0.001472
Epoch 46/50, Train Loss: 0.001446, Test Loss: 0.001462
