## Autoencoder
Refs: [DL book](https://www.deeplearningbook.org/contents/autoencoders.html); [inspired by](https://gist.github.com/AFAgarap/4f8a8d8edf352271fa06d85ba0361f26).


In [52]:
import torch       
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F

from sklearn.decomposition import PCA
from scipy.stats import special_ortho_group
import numpy as np

np.set_printoptions(precision=3, suppress=True)
torch.set_printoptions(precision=3, sci_mode=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [53]:
# architecture
class shallow_AE(nn.Module):
    def __init__(self, **kwargs):
        super().__init__()
        self.encoder_layer = nn.Linear(
            in_features=kwargs["input_width"], out_features=kwargs["hidden_width"], bias=kwargs["bias"]
        )
        self.decoder_layer = nn.Linear(
            in_features=kwargs["hidden_width"], out_features=kwargs["input_width"], bias=kwargs["bias"]
        )
        
        if 'linear' in kwargs:
            self.linear = kwargs['linear']
        else:
            self.linear = False

    def forward(self, features):
        activation = self.encoder_layer(features)
        if not self.linear:
            activation = F.relu(activation)
        reconstructed = self.decoder_layer(activation)
        return reconstructed
    
def train(model, epochs, train_loader, optimizer, criterion, verbose=True):
    for epoch in range(epochs):
        loss = 0
        for batch_features in train_loader:
            batch_features = batch_features.to(device)
            optimizer.zero_grad()

            outputs = model(batch_features)
            train_loss = criterion(outputs, batch_features)
            train_loss.backward()
            optimizer.step()
            loss += train_loss.item()

        loss = loss / len(train_loader)
        if verbose:
            print("epoch : {}/{}, loss = {:.6f}".format(epoch + 1, epochs, loss))
        
    return model, loss

In [54]:
def gen_data(**kwargs):
    data0 = torch.randn(kwargs['samples'], kwargs['input_width'])
    
    if ('true_dim' in kwargs) and (kwargs['true_dim']<kwargs['input_width']):
        data0[:,kwargs['true_dim']:] = 0
        data0 = data0 @ special_ortho_group.rvs(kwargs['input_width'])
        data0 = data0.float()
        
    train_loader = torch.utils.data.DataLoader(
        data0, batch_size=kwargs['batch_size']
    )
    
    return data0, train_loader

def PCA_compare(model, model_loss, data0, **kwargs):
    U, s, V = np.linalg.svd(data0, full_matrices=False)

    b=np.append(s[:kwargs['hidden_width']], np.zeros(len(s)-kwargs['hidden_width']))
    L_opt = np.sum(s**2-b**2)*kwargs['batch_size']/kwargs['samples']

    print("Model loss = ", model_loss)
    print("PCA loss = ", L_opt)

#     print("\nModel params:")
#     for n, p in model.named_parameters():
#         print(n, p)

    with torch.no_grad():
        print("model estimate:\n", model(data0.to(device)).float())
        print("\nPCA estimate:\n", U  @ np.diag(b) @ V)

## Linear activation implies that AutoEncoder = PCA

In [55]:
# hyperparams
params = {
    # data    
    "input_width": 6, 
    "samples": 1000,
    
    # model
    "hidden_width":3,
    "linear":True,
    "bias": False,
    
    # training
    "lr": 1e-3,
    "batch_size": 100,
    "epochs": 1000
}

data0, train_loader = gen_data(**params)

model = shallow_AE(**params).to(device)
opt = optim.SGD(model.parameters(), lr=params['lr'])
crit = nn.MSELoss(reduction='sum')

model_trained, model_loss = train(model, params["epochs"], train_loader, opt, crit, verbose=False)

PCA_compare(model_trained, model_loss, data0, **params)

Model loss =  285.1575469970703
PCA loss =  284.8278944986494
model estimate:
 tensor([[-1.858, -0.321, -0.645,  0.577, -1.535,  0.040],
        [ 0.572,  0.076,  0.465, -0.811, -2.136, -1.342],
        [ 0.344,  0.013,  0.112, -0.022,  0.661,  0.223],
        ...,
        [-0.531,  0.494, -0.481,  0.087, -1.099, -0.776],
        [ 0.431,  1.107, -0.412, -0.173, -0.398, -1.186],
        [ 0.276,  0.454, -0.212,  0.116,  0.833, -0.003]], device='cuda:0')

PCA estimate:
 [[-1.856 -0.327 -0.656  0.574 -1.538  0.029]
 [ 0.577  0.062  0.474 -0.816 -2.139 -1.349]
 [ 0.339  0.018  0.109 -0.018  0.661  0.225]
 ...
 [-0.542  0.488 -0.477  0.083 -1.094 -0.775]
 [ 0.414  1.115 -0.402 -0.181 -0.398 -1.192]
 [ 0.271  0.46  -0.206  0.113  0.834 -0.001]]


### What if the true input dimension is smaller than the width of the network?

In [56]:
params['true_dim'] = params['hidden_width']-1

data0, train_loader = gen_data(**params)

model = shallow_AE(**params).to(device)
opt = optim.SGD(model.parameters(), lr=params['lr'])
crit = nn.MSELoss(reduction='sum')

model_trained, model_loss = train(model, params["epochs"], train_loader, opt, crit, verbose=False)

model_loss
PCA_compare(model_trained, model_loss, data0, **params)

Model loss =  2.676261122436241e-12
PCA loss =  -1.0023780874967085e-06
model estimate:
 tensor([[ 0.635,  0.139, -0.340,  0.093,  1.516,  0.880],
        [ 0.009, -0.783, -0.952, -0.064, -0.761, -1.064],
        [-0.379,  0.127,  0.457, -0.038, -0.695, -0.237],
        ...,
        [-0.407,  0.010,  0.338, -0.051, -0.872, -0.428],
        [-0.474,  0.603,  1.107, -0.011, -0.428,  0.313],
        [ 0.373, -0.814, -1.281, -0.020, -0.002, -0.712]], device='cuda:0')

PCA estimate:
 [[ 0.635  0.139 -0.34   0.093  1.516  0.88 ]
 [ 0.009 -0.783 -0.952 -0.064 -0.761 -1.064]
 [-0.379  0.127  0.457 -0.038 -0.695 -0.237]
 ...
 [-0.407  0.01   0.338 -0.051 -0.872 -0.428]
 [-0.474  0.603  1.107 -0.011 -0.428  0.313]
 [ 0.373 -0.814 -1.281 -0.02  -0.002 -0.712]]
