## Autoencoder
Refs: [DL book](https://www.deeplearningbook.org/contents/autoencoders.html); [inspired by](https://gist.github.com/AFAgarap/4f8a8d8edf352271fa06d85ba0361f26).


In [66]:
import torch       
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F

from sklearn.decomposition import PCA
import numpy as np

np.set_printoptions(precision=3, suppress=True)
torch.set_printoptions(precision=3, sci_mode=False)

In [134]:
# architecture

class shallow_AE(nn.Module):
    def __init__(self, **kwargs):
        super().__init__()
        self.encoder_layer = nn.Linear(
            in_features=kwargs["input_width"], out_features=kwargs["hidden_width"], bias=kwargs["bias"]
        )
        self.decoder_layer = nn.Linear(
            in_features=kwargs["hidden_width"], out_features=kwargs["input_width"], bias=kwargs["bias"]
        )
        
        if 'linear' in kwargs:
            self.linear = kwargs['linear']
        else:
            self.linear = False

    def forward(self, features):
        activation = self.encoder_layer(features)
        if not self.linear:
            activation = F.relu(activation)
        reconstructed = self.decoder_layer(activation)
        return reconstructed
    
def train(model, epochs, train_loader, optimizer, criterion, verbose=True):
    for epoch in range(epochs):
        loss = 0
        for batch_features in train_loader:
            optimizer.zero_grad()

            outputs = model(batch_features)
            train_loss = criterion(outputs, batch_features)
            train_loss.backward()
            optimizer.step()
            loss += train_loss.item()

        loss = loss / len(train_loader)
        if verbose:
            print("epoch : {}/{}, loss = {:.6f}".format(epoch + 1, epochs, loss))
        
    return model, loss

In [172]:
def gen_data(**kwargs):
    data0 = torch.randn(kwargs['samples'], kwargs['input_width'])
    
    if ('true_dim' in kwargs) and (kwargs['true_dim']<kwargs['input_width']):
        data0[:,kwargs['true_dim']:] = 0
    
    train_loader = torch.utils.data.DataLoader(
        data0, batch_size=kwargs['batch_size']
    )
    
    return data0, train_loader

def PCA_compare(model, model_loss, data0, **kwargs):
    U, s, V = np.linalg.svd(data0, full_matrices=False)

    b=np.append(s[:kwargs['hidden_width']], np.zeros(len(s)-kwargs['hidden_width']))
    L_opt = np.sum(s**2-b**2)*kwargs['batch_size']/kwargs['samples']

    print("Model loss = ", model_loss)
    print("Optimal loss = ", L_opt)

    print("\nModel params:")
    for n, p in model.named_parameters():
        print(n, p)

    with torch.no_grad():
        print("\nPCA estimate:\n", U  @ np.diag(b) @ V)
        print("model estimate:\n", model(data0))

## Linear activation implies that AutoEncoder = PCA

In [173]:
# hyperparams
params = {
    # data    
    "input_width": 6, 
    "samples": 1000,
    
    # model
    "hidden_width":3,
    "linear":True,
    "bias": False,
    
    # training
    "lr": 1e-3,
    "batch_size": 100,
    "epochs": 1000
}

data0, train_loader = gen_data(**params)

model = shallow_AE(**params)
opt = optim.Adam(model.parameters(), lr=params['lr'])
crit = nn.MSELoss(reduction='sum')

model_trained, model_loss = train(model, params["epochs"], train_loader, opt, crit, verbose=False)

PCA_compare(model_trained, model_loss, data0, **params)

Model loss =  283.50923614501954
Optimal loss =  283.49227618437436

Model params:
encoder_layer.weight Parameter containing:
tensor([[-0.342,  0.295, -0.481,  0.071, -0.573, -0.297],
        [-0.139, -0.853, -0.300,  0.260,  0.437,  0.280],
        [ 0.089, -0.203, -0.231, -0.459,  0.053, -0.352]], requires_grad=True)
decoder_layer.weight Parameter containing:
tensor([[-0.555, -0.305,  0.306],
        [ 0.072, -0.691, -0.416],
        [-0.744, -0.482, -0.380],
        [ 0.321,  0.357, -1.119],
        [-0.608,  0.172,  0.193],
        [-0.203,  0.197, -0.788]], requires_grad=True)

PCA estimate:
 [[ 0.299 -1.01  -0.098 -0.488  0.785 -0.047]
 [ 0.143  0.419  0.478  0.142 -0.017  0.208]
 [-0.408 -0.633 -0.9    0.012 -0.124 -0.262]
 ...
 [ 0.554 -0.704  0.477 -0.444  0.936  0.184]
 [ 0.393  1.013  0.68  -0.424 -0.281 -0.238]
 [ 0.062 -0.156  0.825  1.042  0.575  1.15 ]]
model estimate:
 tensor([[ 0.300, -1.010, -0.098, -0.488,  0.786, -0.047],
        [ 0.143,  0.419,  0.478,  0.142, -0.

What if the real input dimension is smaller than the width of the network?

In [175]:
params['true_dim'] = params['hidden_width']-1

data0, train_loader = gen_data(**params)

model = shallow_AE(**params)
opt = optim.Adam(model.parameters(), lr=params['lr'])
crit = nn.MSELoss(reduction='sum')

model_trained, model_loss = train(model, params["epochs"], train_loader, opt, crit, verbose=False)

PCA_compare(model_trained, model_loss, data0, **params)

Model loss =  4.842910922980836e-10
Optimal loss =  -6.294295963016338e-06

Model params:
encoder_layer.weight Parameter containing:
tensor([[-0.572, -0.288, -0.190, -0.391,  0.058,  0.131],
        [ 0.262, -0.697, -0.215, -0.190, -0.282,  0.402],
        [-0.625,  0.285, -0.233, -0.345,  0.034,  0.320]], requires_grad=True)
decoder_layer.weight Parameter containing:
tensor([[-1.099,  0.255, -0.488],
        [-0.867, -0.908,  0.414],
        [ 0.027, -0.025, -0.035],
        [-0.207,  0.196,  0.272],
        [-0.078,  0.074,  0.102],
        [ 0.216, -0.205, -0.283]], requires_grad=True)

PCA estimate:
 [[-0.79   1.679  0.     0.     0.     0.   ]
 [-1.279 -1.373  0.     0.     0.     0.   ]
 [ 0.49  -1.433  0.     0.     0.     0.   ]
 ...
 [ 1.078  0.092  0.     0.     0.     0.   ]
 [ 1.672 -1.244  0.     0.     0.     0.   ]
 [ 0.141  0.698  0.     0.     0.     0.   ]]
model estimate:
 tensor([[    -0.790,      1.679,      0.000,      0.000,      0.000,      0.000],
        [    