___To dos:___

- Program a function to visualize data, and to plot comparisons between the input data and the network's output

_Imports_

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from data_utils import *
from structure_utils import *
from train_utils import *
import torch
import torchvision
import torch.optim as optim
import torch.nn.functional as F
from torch import nn

ngpu = 1
device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")

__General Structure__

In [3]:
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        self.encoder = self.build_encoder()
        self.decoder = self.build_decoder()
        
    def forward(self, X):
        return self.decoder(self.encoder(X))
    
    def build_encoder(self, *args, **kwargs):
        raise NotImplementedError()
    
    def build_decoder(self, *args, **kwargs):
        raise NotImplementedError()

__Basic Autoencoder__

In [4]:
class BasicAutoencoder(Autoencoder):
    def build_encoder(self, *args, **kwargs):
        return nn.Sequential(LinearBlock(784, 128, 'relu'),
                             LinearBlock(128, 64, 'relu'),
                             LinearBlock(64, 32, 'relu'))
    
    def build_decoder(self, *args, **kwargs):
        return nn.Sequential(LinearBlock(32, 64, 'relu'),
                             LinearBlock(64, 128, 'relu'),
                             LinearBlock(128, 784, 'tanh'))

In [8]:
# model, dataloaders, dataset_size, criterion, optimizer, scheduler = None, num_epochs = 10
model = BasicAutoencoder().to(device)
dataloaders, dataset_size = load_FashionMNIST(BATCH_SIZE = 64, size = 0.3)
# dataloaders, dataset_size = load_cifar(BATCH_SIZE = 64, size = 0.3)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr = 1e-3, weight_decay = 1e-5)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = 5, gamma = 0.5)

num_epochs = 100

Train Data Shape: 18048
Val Data Shape: 3008


In [9]:
model, stats = train_autoencoder(model, dataloaders, dataset_size, criterion, optimizer, scheduler, num_epochs)
# You need to verify why the validation error is smaller than the training error.
# Test again, you changed the LinearBlock class.
# Also:
# Automatize the proccess of plotting measures of the network's training

Epoch    Stage       Loss  

1/100    Training    0.17  
         Validation  0.10  
2/100    Training    0.09  
         Validation  0.09  
3/100    Training    0.08  
         Validation  0.08  
4/100    Training    0.08  
         Validation  0.08  
5/100    Training    0.07  
         Validation  0.07  
6/100    Training    0.07  
         Validation  0.07  
7/100    Training    0.07  
         Validation  0.07  
8/100    Training    0.07  
         Validation  0.07  
9/100    Training    0.06  
         Validation  0.07  
10/100   Training    0.06  
         Validation  0.06  
11/100   Training    0.06  
         Validation  0.06  
12/100   Training    0.06  
         Validation  0.06  
13/100   Training    0.06  
         Validation  0.06  
14/100   Training    0.06  
         Validation  0.06  
15/100   Training    0.06  
         Validation  0.06  
16/100   Training    0.06  
         Validation  0.06  
17/100   Training    0.06  
         Validation  0.06  
18/100   Training  

__Convolutional Autoencoder__

In [4]:
class ConvolutionalAutoencoder(Autoencoder):
    def build_encoder(self, *args, **kwargs):
        return nn.Sequential(ConvPoolBlock(in_size = 1, out_size = 16, kernel = 3, pad_size = 1, pool_kernel = 2, act_function = 'relu'),
                             ConvPoolBlock(in_size = 16, out_size = 8, kernel = 3, pad_size = 1, pool_kernel = 2, act_function = 'relu'))

    def build_decoder(self, *args, **kwargs):
        return nn.Sequential(ConvUpsampleBlock(in_size = 8, out_size = 8, kernel = 3, pad_size = 1, scale_factor = 2, act_function = 'relu'),
                             ConvUpsampleBlock(in_size = 8, out_size = 16, kernel = 3, pad_size = 1, scale_factor = 2, act_function = 'relu'),
                             ConvBlock(in_size = 16, out_size = 1, kernel = 3, pad_size = 1, act_function = 'tanh'))

In [14]:
model = ConvolutionalAutoencoder().to(device)
dataloaders, dataset_size = load_FashionMNIST(BATCH_SIZE = 64, size = 0.3)
# dataloaders, dataset_size = load_cifar(BATCH_SIZE = 64, size = 0.3)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr = 1e-3, weight_decay = 1e-5)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = 5, gamma = 0.5)

num_epochs = 100

Train Data Shape: 18048
Val Data Shape: 3008


In [15]:
model, stats = train_autoencoder(model, dataloaders, dataset_size, criterion, optimizer, scheduler, num_epochs, view = False)

Epoch    Stage       Loss  

1/100    Training    0.16  
         Validation  0.11  
2/100    Training    0.09  
         Validation  0.08  
3/100    Training    0.08  
         Validation  0.07  
4/100    Training    0.07  
         Validation  0.07  
5/100    Training    0.06  
         Validation  0.06  
6/100    Training    0.06  
         Validation  0.06  
7/100    Training    0.06  
         Validation  0.05  
8/100    Training    0.05  
         Validation  0.05  
9/100    Training    0.05  
         Validation  0.05  
10/100   Training    0.05  
         Validation  0.05  
11/100   Training    0.05  
         Validation  0.05  
12/100   Training    0.05  
         Validation  0.05  
13/100   Training    0.05  
         Validation  0.05  
14/100   Training    0.05  
         Validation  0.05  
15/100   Training    0.05  
         Validation  0.05  
16/100   Training    0.05  
         Validation  0.05  
17/100   Training    0.05  
         Validation  0.05  
18/100   Training  

#### __Sparse Autoencoders__

__Sparse Autoencoder: L1 Regularization__

In [13]:
class L1SparseAutoencoder(Autoencoder):
    def build_encoder(self, *args, **kwargs):
        return nn.Sequential(LinearBlock(784, 128, 'relu'),
                             LinearBlock(128, 64, 'relu'),
                             LinearBlock(64, 32, 'relu'))
    
    def build_decoder(self, *args, **kwargs):
        return nn.Sequential(LinearBlock(32, 64, 'relu'),
                             LinearBlock(64, 128, 'relu'),
                             LinearBlock(128, 784, 'tanh'))
    
    def L1Loss(self, X: torch.Tensor) -> torch.Tensor:
        loss = 0
#         Y = X
#         for component in self.encoder.children():
#             Y = component(Y)
#             loss += torch.mean(torch.abs(Y))
        with torch.no_grad():
            code = self.encoder(X)
            loss += torch.mean(torch.abs(code))
            loss += torch.mean(torch.abs(self.decoder(code)))
        return loss
    
    # This can be generalized
    def penalty(self, X: torch.Tensor, coefficient: float = 1e-3) -> torch.Tensor:
        '''
        Inputs
        X: (batch_size, 1, input_dim)
        
        Outputs
        loss: a single - tensor, autograd enabled - value.
        '''
        return coefficient*self.L1Loss(X)

In [16]:
model = L1SparseAutoencoder().to(device)
dataloaders, dataset_size = load_FashionMNIST(BATCH_SIZE = 64, size = 0.3)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr = 1e-3, weight_decay = 1e-5)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = 5, gamma = 0.5)

num_epochs = 100

Train Data Shape: 18048
Val Data Shape: 3008


In [17]:
model, stats = train_autoencoder(model, dataloaders, dataset_size, criterion, optimizer, scheduler, num_epochs, view = True)

Epoch    Stage       Loss  

1/100    Training    0.17  
         Validation  0.10  
2/100    Training    0.09  
         Validation  0.09  
3/100    Training    0.08  
         Validation  0.08  
4/100    Training    0.08  
         Validation  0.08  
5/100    Training    0.07  
         Validation  0.07  
6/100    Training    0.07  
         Validation  0.07  
7/100    Training    0.07  
         Validation  0.07  
8/100    Training    0.07  
         Validation  0.07  
9/100    Training    0.07  
         Validation  0.07  
10/100   Training    0.07  
         Validation  0.07  
11/100   Training    0.06  
         Validation  0.07  
12/100   Training    0.06  
         Validation  0.06  
13/100   Training    0.06  
         Validation  0.06  
14/100   Training    0.06  
         Validation  0.06  
15/100   Training    0.06  
         Validation  0.06  
16/100   Training    0.06  
         Validation  0.06  
17/100   Training    0.06  
         Validation  0.06  
18/100   Training  

### Relational Autoencoder
***

Relational Loss

In [7]:
class product(Relation):
    def function(self, inputs: torch.tensor, *args, **kwargs) -> torch.float:
        value = torch.matmul(inputs, inputs.transpose(1, 2)).reshape(inputs.shape[0])
        return (value - torch.mean(value))/(torch.std(value))
    
class productConv(Relation):
    def function(self, inputs: torch.tensor, *args, **kwargs) -> torch.float:
        inputs = inputs.view(inputs.shape[0], 1, -1)
        value = torch.matmul(inputs, inputs.transpose(1, 2)).reshape(inputs.shape[0])
        return (value - torch.mean(value))/(torch.std(value))

In [121]:
model = ConvolutionalAutoencoder().to(device)
dataloaders, dataset_size = load_FashionMNIST(BATCH_SIZE = 256, size = 0.3)
# dataloaders, dataset_size = load_cifar(BATCH_SIZE = 256, size = 0.3)
criterion = RelationalLoss(nn.MSELoss(), productConv(), 0.15)
optimizer = optim.Adam(model.parameters(), lr = 1e-2, weight_decay = 1e-5)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = 15, gamma = 0.5)

num_epochs = 50

Train Data Shape: 18176
Val Data Shape: 3072


In [122]:
model, stats = train_autoencoder(model, dataloaders, dataset_size, criterion, optimizer, scheduler, num_epochs, view = False)

Epoch    Stage       Loss  

1/50     Training    0.19  
         Validation  0.13  
2/50     Training    0.10  
         Validation  0.09  
3/50     Training    0.08  
         Validation  0.08  
4/50     Training    0.07  
         Validation  0.07  
5/50     Training    0.07  
         Validation  0.06  
6/50     Training    0.06  
         Validation  0.06  
7/50     Training    0.06  
         Validation  0.06  
8/50     Training    0.06  
         Validation  0.06  
9/50     Training    0.06  
         Validation  0.06  
10/50    Training    0.05  
         Validation  0.05  
11/50    Training    0.05  
         Validation  0.06  
12/50    Training    0.05  
         Validation  0.05  
13/50    Training    0.05  
         Validation  0.05  
14/50    Training    0.05  
         Validation  0.05  
15/50    Training    0.05  
         Validation  0.05  
16/50    Training    0.05  
         Validation  0.05  
17/50    Training    0.05  
         Validation  0.05  
18/50    Training  