## Autoencoder
Refs: [DL book](https://www.deeplearningbook.org/contents/autoencoders.html); [inspired by](https://gist.github.com/AFAgarap/4f8a8d8edf352271fa06d85ba0361f26).


In [1]:
import torch       
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F

from sklearn.decomposition import PCA
import numpy as np

np.set_printoptions(precision=3, suppress=True)
torch.set_printoptions(precision=3, sci_mode=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
# architecture

class shallow_AE(nn.Module):
    def __init__(self, **kwargs):
        super().__init__()
        self.encoder_layer = nn.Linear(
            in_features=kwargs["input_width"], out_features=kwargs["hidden_width"], bias=kwargs["bias"]
        )
        self.decoder_layer = nn.Linear(
            in_features=kwargs["hidden_width"], out_features=kwargs["input_width"], bias=kwargs["bias"]
        )
        
        if 'linear' in kwargs:
            self.linear = kwargs['linear']
        else:
            self.linear = False

    def forward(self, features):
        activation = self.encoder_layer(features)
        if not self.linear:
            activation = F.relu(activation)
        reconstructed = self.decoder_layer(activation)
        return reconstructed
    
def train(model, epochs, train_loader, optimizer, criterion, verbose=True):
    for epoch in range(epochs):
        loss = 0
        for batch_features in train_loader:
            batch_features = batch_features.to(device)
            optimizer.zero_grad()

            outputs = model(batch_features)
            train_loss = criterion(outputs, batch_features)
            train_loss.backward()
            optimizer.step()
            loss += train_loss.item()

        loss = loss / len(train_loader)
        if verbose:
            print("epoch : {}/{}, loss = {:.6f}".format(epoch + 1, epochs, loss))
        
    return model, loss

In [3]:
def gen_data(**kwargs):
    data0 = torch.randn(kwargs['samples'], kwargs['input_width'])
    
    if ('true_dim' in kwargs) and (kwargs['true_dim']<kwargs['input_width']):
        data0[:,kwargs['true_dim']:] = 0
    
    train_loader = torch.utils.data.DataLoader(
        data0, batch_size=kwargs['batch_size']
    )
    
    return data0, train_loader

def PCA_compare(model, model_loss, data0, **kwargs):
    U, s, V = np.linalg.svd(data0, full_matrices=False)

    b=np.append(s[:kwargs['hidden_width']], np.zeros(len(s)-kwargs['hidden_width']))
    L_opt = np.sum(s**2-b**2)*kwargs['batch_size']/kwargs['samples']

    print("Model loss = ", model_loss)
    print("PCA loss = ", L_opt)

#     print("\nModel params:")
#     for n, p in model.named_parameters():
#         print(n, p)

    with torch.no_grad():
        print("model estimate:\n", model(data0.to(device)))
        print("\nPCA estimate:\n", U  @ np.diag(b) @ V)

## Linear activation implies that AutoEncoder = PCA

In [7]:
# hyperparams
params = {
    # data    
    "input_width": 6, 
    "samples": 1000,
    
    # model
    "hidden_width":3,
    "linear":True,
    "bias": False,
    
    # training
    "lr": 1e-3,
    "batch_size": 100,
    "epochs": 1000
}

data0, train_loader = gen_data(**params)

model = shallow_AE(**params).to(device)
opt = optim.Adam(model.parameters(), lr=params['lr'])
crit = nn.MSELoss(reduction='sum')

model_trained, model_loss = train(model, params["epochs"], train_loader, opt, crit, verbose=False)

PCA_compare(model_trained, model_loss, data0, **params)

Model loss =  281.99938049316404
PCA loss =  281.73795430352766
model estimate:
 tensor([[ 0.667, -0.504,  0.036,  0.241,  1.405, -0.488],
        [ 0.459, -0.625, -0.455,  0.484,  0.280,  1.241],
        [-0.457, -0.297,  0.127,  0.056, -0.661,  0.484],
        ...,
        [-0.768,  0.290,  0.106, -0.209, -1.365,  0.413],
        [-0.073,  1.101, -0.041, -0.471, -0.330, -0.773],
        [ 1.154,  0.391, -1.056,  0.317,  0.611,  1.133]], device='cuda:0')

PCA estimate:
 [[ 0.676 -0.621  0.124  0.376  1.439 -0.271]
 [ 0.49  -0.261 -0.137  0.549  0.343  1.187]
 [-0.446 -0.254  0.296  0.151 -0.628  0.606]
 ...
 [-0.773  0.572 -0.03  -0.456 -1.409 -0.023]
 [-0.094  0.489 -0.237 -0.324 -0.35  -0.335]
 [ 1.166  0.374 -0.829  0.501  0.659  1.385]]


What if the real input dimension is smaller than the width of the network?

In [6]:
params['true_dim'] = params['hidden_width']-1

data0, train_loader = gen_data(**params)

model = shallow_AE(**params).to(device)
opt = optim.Adam(model.parameters(), lr=params['lr'])
crit = nn.MSELoss(reduction='sum')

model_trained, model_loss = train(model, params["epochs"], train_loader, opt, crit, verbose=False)

model_loss
PCA_compare(model_trained, model_loss, data0, **params)

Model loss =  3.122950270779967e-12
PCA loss =  -4.867875395575538e-06
model estimate:
 tensor([[     0.752,      1.050,     -0.000,      0.000,      0.000,      0.000],
        [    -0.059,     -1.274,     -0.000,     -0.000,     -0.000,     -0.000],
        [     0.431,      1.165,      0.000,     -0.000,      0.000,      0.000],
        ...,
        [     1.770,      0.934,     -0.000,      0.000,     -0.000,      0.000],
        [    -0.848,     -0.899,     -0.000,      0.000,     -0.000,     -0.000],
        [    -0.102,     -0.249,     -0.000,      0.000,     -0.000,     -0.000]],
       device='cuda:0')

PCA estimate:
 [[ 0.752  1.05   0.     0.     0.     0.   ]
 [-0.059 -1.274  0.     0.     0.     0.   ]
 [ 0.431  1.165  0.     0.     0.     0.   ]
 ...
 [ 1.77   0.934  0.     0.     0.     0.   ]
 [-0.848 -0.899  0.     0.     0.     0.   ]
 [-0.102 -0.249  0.     0.     0.     0.   ]]
