# Autoencoders

## Data

### Import and load datasets

In [1]:
from torchvision.datasets import MNIST, FashionMNIST, KMNIST

mnist = MNIST(
    root="data/mnist",
    download=True,
)

mnist_test = MNIST(root="data/mnist", download=True, train=False)

fashion = FashionMNIST(
    root="data/fmnist",
    download=True,
)
fashion_test = FashionMNIST(root="data/fmnist", download=True, train=False)

kuzushiji = KMNIST(
    root="data/kmnist",
    download=True,
)
kuzushiji_test = KMNIST(root="data/kmnist", download=True, train=False)

In [2]:
mnist_train_data = mnist.data
mnist_train_labels = mnist.targets.numpy()
mnist_test_data = mnist_test.data
mnist_test_labels = mnist_test.targets.numpy()

fashion_train_data = fashion.data
fashion_train_labels = fashion.targets.numpy()
fashion_test_data = fashion_test.data
fashion_test_labels = fashion_test.targets.numpy()

kuzushiji_train_data = kuzushiji.data
kuzushiji_train_labels = kuzushiji.targets.numpy()
kuzushiji_test_data = kuzushiji_test.data
kuzushiji_test_labels = kuzushiji_test.targets.numpy()

### Normalize the image histogram

In [3]:
import numpy as np


def normalize(images):
    histograms = np.apply_along_axis(
        np.bincount, 1, images.reshape(images.shape[0], -1), minlength=256
    )

    cdf = histograms.cumsum(axis=1)
    cdf_min = cdf[:, 0][
        :, None
    ]  # Minimum of the CDF (first non-zero element in each row)

    # Normalize the CDF for each image
    cdf_m = ((cdf - cdf_min) * 255) / (cdf.max(axis=1)[:, None] - cdf_min)

    cdf_m = cdf_m.astype(np.uint8)

    # Apply normalized CDF to each image
    normalized_images = cdf_m[
        np.arange(images.shape[0])[:, None, None], images
    ]  #  Broadcasting across images and index

    return normalized_images


mnist_train_data = normalize(mnist_train_data) / 265
mnist_test_data = normalize(mnist_test_data) / 265

fashion_train_data = normalize(fashion_train_data) / 265
fashion_test_data = normalize(fashion_test_data) / 265

kuzushiji_train_data = normalize(kuzushiji_train_data) / 265
kuzushiji_test_data = normalize(kuzushiji_test_data) / 265

### Convert to torch

In [4]:
import torch

mnist_train_data = torch.tensor(mnist_train_data, dtype=torch.float32)
mnist_test_data = torch.tensor(mnist_test_data, dtype=torch.float32)

fashion_train_data = torch.tensor(fashion_train_data, dtype=torch.float32)
fashion_test_data = torch.tensor(fashion_test_data, dtype=torch.float32)

kuzushiji_train_data = torch.tensor(kuzushiji_train_data, dtype=torch.float32)
kuzushiji_test_data = torch.tensor(kuzushiji_test_data, dtype=torch.float32)

---

## Model

### Architecture

In [5]:
import torch.nn as nn


# Define the autoencoder class
class Autoencoder(nn.Module):
    def __init__(self, input_dim=784, embedding_dim=196):
        super(Autoencoder, self).__init__()
        # Encoder
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, embedding_dim),
        )

        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(embedding_dim, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, input_dim),
            nn.Sigmoid(),
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

In [6]:
# Define the autoencoder class
class ConvAutoencoder(nn.Module):
    def __init__(self, embedding_dim=196):
        super(ConvAutoencoder, self).__init__()
        # Encoder
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(32 * 7 * 7, embedding_dim),
        )
        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(embedding_dim, 32 * 7 * 7),
            nn.ReLU(),
            nn.Unflatten(1, (32, 7, 7)),
            nn.ConvTranspose2d(
                32, 16, kernel_size=3, stride=2, padding=1, output_padding=1
            ),
            nn.ReLU(),
            nn.ConvTranspose2d(
                16, 1, kernel_size=3, stride=2, padding=1, output_padding=1
            ),
            nn.Sigmoid(),
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

### Hyperparameters

In [7]:
input_dim = 784
embedding_dim = 196
batch_size = 64
epochs = 50
learning_rate = 1e-3

### Training setup

In [8]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.BCELoss()

### Training loop function

In [9]:
from torch.utils.data import DataLoader


def training_loop(
    model, optimizer, train_dataset, test_dataset, dataset_name, flatten=True
):

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # Training loop
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for batch in train_loader:
            images = batch[0].squeeze()
            if flatten:
                images = images.view(-1, 784)
            else:
                images = images[:, None, :, :]
            images = images.to(DEVICE)

            outputs = model(images)
            loss = criterion(outputs, images)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        train_loss /= len(train_dataset)

        model.eval()
        test_loss = 0
        with torch.no_grad():
            for batch in test_loader:
                images = batch[0].squeeze()
                if flatten:
                    images = images.view(-1, 784)
                else:
                    images = images[:, None, :, :]
                images = images.to(DEVICE)

                outputs = model(images)
                loss = criterion(outputs, images)
                test_loss += loss.item()

        test_loss /= len(test_dataset)

        if epoch % 5 == 4:
            print(
                f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.6f}, Test Loss: {test_loss:.6f}"
            )

    # Save the trained model
    torch.save(model.state_dict(), f"autoencoder_{dataset_name}.pth")

# Score metrics for a model - 

In [10]:
import pandas as pd
from sklearn.metrics import roc_auc_score, f1_score, precision_score, recall_score


def get_score_df(clf, X_test, y_test):
    y_pred = clf.predict(X_test)
    y_prob = clf.predict_proba(X_test)

    result_df = pd.DataFrame(columns=["label", "auroc", "f1", "prec", "rec"])
    classes = np.unique(y_test)  # Assuming y_train contains all classes
    for cls in classes:
        binary_y_test = (y_test == cls).astype(int)
        binary_y_pred = (y_pred == cls).astype(int)

        auroc = roc_auc_score(binary_y_test, y_prob[:, cls])
        f1 = f1_score(binary_y_test, binary_y_pred)
        precision = precision_score(binary_y_test, binary_y_pred)
        recall = recall_score(binary_y_test, binary_y_pred)

        result_df.loc[result_df.shape[0]] = [cls, auroc, f1, precision, recall]

    auroc = roc_auc_score(y_test, y_prob, multi_class="ovr", average="weighted")
    f1 = f1_score(y_test, y_pred, average="weighted")
    precision = precision_score(y_test, y_pred, average="weighted")
    recall = recall_score(y_test, y_pred, average="weighted")
    result_df.loc[result_df.shape[0]] = ["all", auroc, f1, precision, recall]

    return result_df

---

# Train mnist

In [11]:
from torch.utils.data import TensorDataset

dataset_name = "mnist"

train_data = mnist_train_data
test_data = mnist_test_data

train_labels = mnist_train_labels
test_labels = mnist_test_labels

train_dataset = TensorDataset(train_data)
test_dataset = TensorDataset(test_data)

### MLP AE

In [12]:
import torch.optim as optim

model = Autoencoder(input_dim=input_dim, embedding_dim=embedding_dim).to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
training_loop(model, optimizer, train_dataset, test_dataset, dataset_name)

Epoch 5/50, Train Loss: 0.001631, Test Loss: 0.001631
Epoch 10/50, Train Loss: 0.001557, Test Loss: 0.001568
Epoch 15/50, Train Loss: 0.001524, Test Loss: 0.001545
Epoch 20/50, Train Loss: 0.001503, Test Loss: 0.001520
Epoch 25/50, Train Loss: 0.001487, Test Loss: 0.001504
Epoch 30/50, Train Loss: 0.001475, Test Loss: 0.001492
Epoch 35/50, Train Loss: 0.001464, Test Loss: 0.001485
Epoch 40/50, Train Loss: 0.001455, Test Loss: 0.001472
Epoch 45/50, Train Loss: 0.001448, Test Loss: 0.001465
Epoch 50/50, Train Loss: 0.001441, Test Loss: 0.001457


In [13]:
model.eval()

encodings_train = model.encoder(train_data.view(-1, 784).to(DEVICE))
encodings_test = model.encoder(test_data.view(-1, 784).to(DEVICE))

encodings_train = encodings_train.cpu().detach().numpy()
encodings_test = encodings_test.cpu().detach().numpy()

Logistic regression

In [14]:
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression(n_jobs=-1, random_state=42)
clf.fit(encodings_train, train_labels)
result_df = get_score_df(clf, encodings_test, test_labels)
result_df.tail(1)

Unnamed: 0,label,auroc,f1,prec,rec
10,all,0.996338,0.93452,0.934535,0.9346


Random forest

In [15]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(n_jobs=-1, random_state=42)
clf.fit(encodings_train, train_labels)
result_df = get_score_df(clf, encodings_test, test_labels)
result_df.tail(1)

Unnamed: 0,label,auroc,f1,prec,rec
10,all,0.997693,0.953408,0.953475,0.9535


Using Random Forest Classifier allowed us to improve the results slightly

### Convolution AE

In [16]:
model_conv = ConvAutoencoder(embedding_dim=embedding_dim).to(DEVICE)
optimizer = optim.Adam(model_conv.parameters(), lr=learning_rate)

training_loop(
    model_conv, optimizer, train_dataset, test_dataset, dataset_name, flatten=False
)

Epoch 5/50, Train Loss: 0.001443, Test Loss: 0.001443
Epoch 10/50, Train Loss: 0.001408, Test Loss: 0.001414
Epoch 15/50, Train Loss: 0.001394, Test Loss: 0.001401
Epoch 20/50, Train Loss: 0.001384, Test Loss: 0.001394
Epoch 25/50, Train Loss: 0.001378, Test Loss: 0.001385
Epoch 30/50, Train Loss: 0.001373, Test Loss: 0.001380
Epoch 35/50, Train Loss: 0.001370, Test Loss: 0.001377
Epoch 40/50, Train Loss: 0.001367, Test Loss: 0.001376
Epoch 45/50, Train Loss: 0.001365, Test Loss: 0.001374
Epoch 50/50, Train Loss: 0.001364, Test Loss: 0.001373


In [17]:
model_conv.eval()

encodings_train = model_conv.encoder(train_data[:, None, :, :].to(DEVICE))
encodings_test = model_conv.encoder(test_data[:, None, :, :].to(DEVICE))

encodings_train = encodings_train.cpu().detach().numpy()
encodings_test = encodings_test.cpu().detach().numpy()

In [18]:
clf = LogisticRegression(n_jobs=-1, random_state=42)
clf.fit(encodings_train, train_labels)
result_df = get_score_df(clf, encodings_test, test_labels)
result_df.tail(1)

Unnamed: 0,label,auroc,f1,prec,rec
10,all,0.994577,0.92533,0.92537,0.9255


In [19]:
clf = RandomForestClassifier(n_jobs=-1, random_state=42)
clf.fit(encodings_train, train_labels)
result_df = get_score_df(clf, encodings_test, test_labels)
result_df.tail(1)

Unnamed: 0,label,auroc,f1,prec,rec
10,all,0.99762,0.950748,0.95084,0.9508


Using embeddings made with convolution model yielded better value for reconstruction loss but the classification results were slightly worse.

We still maintain better scores for random forest.


Best results for mnist were achieved for the RBM used as the first layer of DBN training in the previous part of laboratory = above 0.998 auroc

---

# Fashion mnist



In [20]:
dataset_name = "fashion_mnist"

train_data = fashion_train_data
test_data = fashion_test_data

train_labels = fashion_train_labels
test_labels = fashion_test_labels

train_dataset = TensorDataset(train_data)
test_dataset = TensorDataset(test_data)

### MLP AE

In [21]:
model = Autoencoder(input_dim=input_dim, embedding_dim=embedding_dim).to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
training_loop(model, optimizer, train_dataset, test_dataset, dataset_name)

Epoch 5/50, Train Loss: 0.004730, Test Loss: 0.004756
Epoch 10/50, Train Loss: 0.004591, Test Loss: 0.004634
Epoch 15/50, Train Loss: 0.004513, Test Loss: 0.004557
Epoch 20/50, Train Loss: 0.004460, Test Loss: 0.004513
Epoch 25/50, Train Loss: 0.004424, Test Loss: 0.004474
Epoch 30/50, Train Loss: 0.004399, Test Loss: 0.004454
Epoch 35/50, Train Loss: 0.004380, Test Loss: 0.004435
Epoch 40/50, Train Loss: 0.004366, Test Loss: 0.004424
Epoch 45/50, Train Loss: 0.004355, Test Loss: 0.004411
Epoch 50/50, Train Loss: 0.004346, Test Loss: 0.004404


In [22]:
model.eval()

encodings_train = model.encoder(train_data.view(-1, 784).to(DEVICE))
encodings_test = model.encoder(test_data.view(-1, 784).to(DEVICE))

encodings_train = encodings_train.cpu().detach().numpy()
encodings_test = encodings_test.cpu().detach().numpy()

Logistic regression

In [23]:
clf = LogisticRegression(n_jobs=-1, random_state=42)
clf.fit(encodings_train, train_labels)
result_df = get_score_df(clf, encodings_test, test_labels)
result_df.tail(1)

Unnamed: 0,label,auroc,f1,prec,rec
10,all,0.986044,0.855532,0.855222,0.8569


Random forest

In [24]:
clf = RandomForestClassifier(n_jobs=-1, random_state=42)
clf.fit(encodings_train, train_labels)
result_df = get_score_df(clf, encodings_test, test_labels)
result_df.tail(1)

Unnamed: 0,label,auroc,f1,prec,rec
10,all,0.985463,0.845515,0.846765,0.8484


In case of fashion mnist, using random forest didn't improve the classification result for mlp-based autoencoder

### Convolution AE

In [25]:
model_conv = ConvAutoencoder(embedding_dim=embedding_dim).to(DEVICE)
optimizer = optim.Adam(model_conv.parameters(), lr=learning_rate)

training_loop(
    model_conv, optimizer, train_dataset, test_dataset, dataset_name, flatten=False
)

Epoch 5/50, Train Loss: 0.004402, Test Loss: 0.004432
Epoch 10/50, Train Loss: 0.004323, Test Loss: 0.004360
Epoch 15/50, Train Loss: 0.004303, Test Loss: 0.004343
Epoch 20/50, Train Loss: 0.004291, Test Loss: 0.004333
Epoch 25/50, Train Loss: 0.004285, Test Loss: 0.004328
Epoch 30/50, Train Loss: 0.004280, Test Loss: 0.004324
Epoch 35/50, Train Loss: 0.004277, Test Loss: 0.004320
Epoch 40/50, Train Loss: 0.004274, Test Loss: 0.004318
Epoch 45/50, Train Loss: 0.004271, Test Loss: 0.004315
Epoch 50/50, Train Loss: 0.004269, Test Loss: 0.004315


In [26]:
model_conv.eval()

encodings_train = model_conv.encoder(train_data[:, None, :, :].to(DEVICE))
encodings_test = model_conv.encoder(test_data[:, None, :, :].to(DEVICE))

encodings_train = encodings_train.cpu().detach().numpy()
encodings_test = encodings_test.cpu().detach().numpy()

Logistic regression

In [27]:
clf = LogisticRegression(n_jobs=-1, random_state=42)
clf.fit(encodings_train, train_labels)
result_df = get_score_df(clf, encodings_test, test_labels)
result_df.tail(1)

Unnamed: 0,label,auroc,f1,prec,rec
10,all,0.984834,0.848218,0.847563,0.8501


Random forest

In [28]:
clf = RandomForestClassifier(n_jobs=-1, random_state=42)
clf.fit(encodings_train, train_labels)
result_df = get_score_df(clf, encodings_test, test_labels)
result_df.tail(1)

Unnamed: 0,label,auroc,f1,prec,rec
10,all,0.985479,0.847246,0.848644,0.8501


The results didn't improve over mlp autoencoder despite better reconstruction loss

Best score achieved for logistic regression and mlp autoencoder - above 0.986 auroc

---

# Kuzushiji mnist



In [29]:
dataset_name = "kuzushiji_mnist"

train_data = kuzushiji_train_data
test_data = kuzushiji_test_data

train_labels = kuzushiji_train_labels
test_labels = kuzushiji_test_labels

train_dataset = TensorDataset(train_data)
test_dataset = TensorDataset(test_data)

### MLP AE

In [30]:
model = Autoencoder(input_dim=input_dim, embedding_dim=embedding_dim).to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
training_loop(model, optimizer, train_dataset, test_dataset, dataset_name)

Epoch 5/50, Train Loss: 0.003274, Test Loss: 0.003357
Epoch 10/50, Train Loss: 0.003036, Test Loss: 0.003114
Epoch 15/50, Train Loss: 0.002935, Test Loss: 0.002996
Epoch 20/50, Train Loss: 0.002880, Test Loss: 0.002941
Epoch 25/50, Train Loss: 0.002844, Test Loss: 0.002883
Epoch 30/50, Train Loss: 0.002821, Test Loss: 0.002860
Epoch 35/50, Train Loss: 0.002802, Test Loss: 0.002840
Epoch 40/50, Train Loss: 0.002790, Test Loss: 0.002823
Epoch 45/50, Train Loss: 0.002780, Test Loss: 0.002817
Epoch 50/50, Train Loss: 0.002771, Test Loss: 0.002799


In [31]:
model.eval()

encodings_train = model.encoder(train_data.view(-1, 784).to(DEVICE))
encodings_test = model.encoder(test_data.view(-1, 784).to(DEVICE))

encodings_train = encodings_train.cpu().detach().numpy()
encodings_test = encodings_test.cpu().detach().numpy()

Logistic regression

In [32]:
clf = LogisticRegression(n_jobs=-1, random_state=42)
clf.fit(encodings_train, train_labels)
result_df = get_score_df(clf, encodings_test, test_labels)
result_df.tail(1)

Unnamed: 0,label,auroc,f1,prec,rec
10,all,0.95718,0.740726,0.746162,0.7392


Random forest

In [33]:
clf = RandomForestClassifier(n_jobs=-1, random_state=42)
clf.fit(encodings_train, train_labels)
result_df = get_score_df(clf, encodings_test, test_labels)
result_df.tail(1)

Unnamed: 0,label,auroc,f1,prec,rec
10,all,0.973758,0.811195,0.81639,0.8116


### Convolution AE

In [34]:
model_conv = ConvAutoencoder(embedding_dim=embedding_dim).to(DEVICE)
optimizer = optim.Adam(model_conv.parameters(), lr=learning_rate)

training_loop(
    model_conv, optimizer, train_dataset, test_dataset, dataset_name, flatten=False
)

Epoch 5/50, Train Loss: 0.002701, Test Loss: 0.002686
Epoch 10/50, Train Loss: 0.002652, Test Loss: 0.002638
Epoch 15/50, Train Loss: 0.002635, Test Loss: 0.002624
Epoch 20/50, Train Loss: 0.002627, Test Loss: 0.002614
Epoch 25/50, Train Loss: 0.002622, Test Loss: 0.002610
Epoch 30/50, Train Loss: 0.002619, Test Loss: 0.002611
Epoch 35/50, Train Loss: 0.002616, Test Loss: 0.002608
Epoch 40/50, Train Loss: 0.002615, Test Loss: 0.002604
Epoch 45/50, Train Loss: 0.002613, Test Loss: 0.002603
Epoch 50/50, Train Loss: 0.002612, Test Loss: 0.002601


In [35]:
model_conv.eval()

encodings_train = model_conv.encoder(train_data[:, None, :, :].to(DEVICE))
encodings_test = model_conv.encoder(test_data[:, None, :, :].to(DEVICE))

encodings_train = encodings_train.cpu().detach().numpy()
encodings_test = encodings_test.cpu().detach().numpy()

Logistic regression

In [36]:
clf = LogisticRegression(n_jobs=-1, random_state=42)
clf.fit(encodings_train, train_labels)
result_df = get_score_df(clf, encodings_test, test_labels)
result_df.tail(1)

Unnamed: 0,label,auroc,f1,prec,rec
10,all,0.945362,0.706561,0.713068,0.705


Random Forest

In [37]:
clf = RandomForestClassifier(n_jobs=-1, random_state=42)
clf.fit(encodings_train, train_labels)
result_df = get_score_df(clf, encodings_test, test_labels)
result_df.tail(1)

Unnamed: 0,label,auroc,f1,prec,rec
10,all,0.969898,0.79183,0.797107,0.7919


Convolution based autoencoder achieved better reconstruction error but still didn't give us better representation for classification

Best score for kuzushiji mnist - Features from first layer of DBN - above 0.979 auroc