In [None]:
import sys
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST

import matplotlib.pyplot as plt

np.set_printoptions(threshold=sys.maxsize)

In [None]:
!pip install pytorch_lightning

In [None]:
import pytorch_lightning as pl

from pytorch_lightning.loggers import TensorBoardLogger

In [None]:
epochs = 5
batch_size = (...)
lr = (...)
device = "gpu"

In [None]:
def add_noise(tensor, idx):
    g = torch.Generator()
    salt = 275991
    g.manual_seed(salt + idx)
    tensor = tensor + torch.normal(mean=0.5, std=0.5, size=tensor.size(), generator=g)
    tensor = torch.clip(tensor, 0, 1)
    return tensor


class MnistForAuto(torch.utils.data.Dataset):
  def __init__(self, dataset, noisy):
    self.dataset = dataset
    self.noisy = noisy
  
  def __getitem__(self, idx):
    img, _ = self.dataset[idx]
    
    target = img
    if self.noisy:
      img = add_noise(target, idx)
    
    return img, target
  
  def __len__(self):
    return len(self.dataset)



def get_mnist_dataloader(train, noisy: bool):
    transforms = [torchvision.transforms.ToTensor(),]
    mnist = MNIST('files/', train=train, download=True,
                  transform=torchvision.transforms.Compose(transforms))
    mnist_for_auto = MnistForAuto(mnist, noisy)
    return DataLoader(mnist_for_auto, batch_size=batch_size, shuffle=train, num_workers=4)


dataloader_tr = get_mnist_dataloader(train=True, noisy=False)
dataloader_test = get_mnist_dataloader(train=False, noisy=False)

In [None]:
imgs, targets = next(iter(dataloader_tr))

def show(imgs):
  grid = torchvision.utils.make_grid(imgs[:8])
  plt.imshow(grid.numpy().transpose([1, 2, 0]))
  plt.show()

show(imgs)
show(targets)

#Convolutional autoencoder
Network:
- 3x3 Conv2d, filters=16, stride=2, relu, padding=
- 3x3 Conv2d filters=8, stride=2, relu, padding=
- 3x3 Conv2d filters=8, stride=2, relu, padding=
- 3x3 Conv2d filters=8, relu, padding=
- 2x2 UpsamplingBilinear2d
- 3x3 Conv2d filters=8, relu, padding=
- 2x2 UpsamplingBilinear2d
- 3x3 Conv2d filters=16, relu, padding=
- 2x2 UpsamplingBilinear2d
- 3x3 Conv2d filters=1, sigmoid, padding=

and train it with Adam and binary_crossentropy.

Question: What is the size of the input, output and compressed representations?

In [None]:
class AutoencoderNet1(nn.Module):
    def __init__(self):
        super().__init__()
        (...)

    def forward(self, x):
        latent = self.encoder(x)
        x = self.decoder(latent)
        return x

In [None]:
class EncoderDecoderModel(pl.LightningModule):
    def __init__(self, net):
        super(EncoderDecoderModel, self).__init__()
        self._net = net

    def forward(self, x):
        x = self._net(x)
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        z = self._net.encoder(x)
        x_hat = self._net.decoder(z)
        loss = self.compute_loss(x_hat, y)
        self.log("train_loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        x_hat = self(x)
        loss = self.compute_loss(x_hat, y)
        self.log('val_loss', loss)
    
    def configure_optimizers(self):
        (...)

    def compute_loss(self, x_hat, x):
        (...)

    def test_step(self, batch, batch_idx):
        x, y = batch
        x_hat = self(x)
        loss = self.compute_loss(x_hat, y)
        self.log('test_loss', loss)
        num_imgs = 6
        grid_in = torchvision.utils.make_grid(x[:num_imgs])
        grid_out = torchvision.utils.make_grid(x_hat[:num_imgs])
        grid_target = torchvision.utils.make_grid(y[:num_imgs])
        self.logger.experiment.add_image('input', grid_in)
        self.logger.experiment.add_image('output', grid_out)
        self.logger.experiment.add_image('target', grid_target)

In [None]:
def run_experiment(model, dataloader_tr, dataloader_test):
    logger = TensorBoardLogger("runs", name="my_model")
    trainer = pl.Trainer(limit_test_batches=1, limit_val_batches=10, max_epochs=epochs, logger=logger, accelerator=device, devices=1)
    trainer.fit(model=model, train_dataloaders=dataloader_tr, val_dataloaders=dataloader_test)
    trainer.test(model=model, dataloaders=dataloader_test)

In [None]:
model = EncoderDecoderModel(AutoencoderNet1())
run_experiment(model, dataloader_tr, dataloader_test)

# Denoising Autoencoder

Let's generate synthetic noisy digits applying a gaussian noise matrix and clipping images between 0 and 1.

In [None]:
dataloader_tr_noisy = get_mnist_dataloader(train=True, noisy=True)
dataloader_test_noisy = get_mnist_dataloader(train=False, noisy=True)

In [None]:
imgs, targets = next(iter(dataloader_tr_noisy))

show(imgs)
show(targets)

Let's define a network:
- 3x3 Conv2d, filters=32, stride=2, relu, padding=
- 3x3 Conv2d, filters=32, stride=2, relu, padding=
- 3x3 Conv2d, filters=32, relu, padding=
- 2x2 UpsamplingBilinear2d
- 3x3 Conv2d, filters=32, relu, padding=
- 2x2 UpsamplingBilinear2d
- 3x3 Conv2d, filters=1, sigmoid, padding=

and train it with Adam and binary_crossentropy.

Question: What is the size of the input, output and compressed representations?

In [None]:
class AutoencoderNet2(nn.Module):
    def __init__(self):
        super().__init__()
        (...)

    def forward(self, x):
        latent = self.encoder(x)
        x = self.decoder(latent)
        return x

In [None]:
model = EncoderDecoderModel(AutoencoderNet2())
run_experiment(model, dataloader_tr_noisy, dataloader_test_noisy)

In [None]:
!ls runs

In [None]:
# !rm -r runs

In [None]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir runs