In [None]:
# default_exp models.autoencoders

# models.autoencoders

> API details.

In [None]:
# export
import numpy as np
import torch
from torch import nn
from fastrenewables.tabular.model import *
from fastrenewables.timeseries.model import *
from fastai.tabular.all import *
from torch.autograd import Variable
from sklearn.datasets import make_regression
from fastai.learner import *
from fastrenewables.utils_pytorch import *
from fastrenewables.losses import VAEReconstructionLoss

## Autoencoder

In [None]:
# export
class Autoencoder(nn.Module):
    def __init__(self, encoder, decoder):
        super().__init__()
        
        self.encoder = encoder
        
        self.decoder = decoder
        
    def encode(self, categorical_data, continuous_data, as_np=False):
        z = self.encoder(categorical_data, continuous_data)
        
        if as_np: return to_np(z)
        else: return z
        
    
    def decode(self, categorical_data, continuous_data, as_np=False):
        x = self.decoder(categorical_data, continuous_data)
        
        if as_np: return to_np(x)
        else: return x
        
    def forward(self, categorical_data, continuous_data):
        x = self.encode(categorical_data, continuous_data)
        x = self.decode(categorical_data, x)
        
        return x

### AE Training

Lets create some data that we wann to compress through an autoencoder.

In [None]:
N = 2000
X, y = make_regression(n_samples=N, n_features=20, n_informative=15)
df = pd.DataFrame(X)
x_names = [str(c) for c in df.columns]
df.columns = x_names
df["y"] = (y - y.min())/(y.max()-y.min())
dls = TabularDataLoaders.from_df(df, cont_names=x_names, y_names=x_names, deivce="cpu", procs=Normalize, bs=N//10)

In [None]:
df["y"].describe()

count    2000.000000
mean        0.518202
std         0.160907
min         0.000000
25%         0.408547
50%         0.520172
75%         0.626163
max         1.000000
Name: y, dtype: float64

In [None]:
dls.one_batch()[1].shape, dls.one_batch()[2].shape

(torch.Size([200, 20]), torch.Size([200, 20]))

In [None]:
num_features = get_c(dls)
ann_structure = [num_features, num_features*5, 5]
ae = Autoencoder(MultiLayerPerceptron(ann_structure), MultiLayerPerceptron(ann_structure[::-1]))

In [None]:
learn = Learner(dls, ae, metrics=rmse)

In [None]:
learn.fit(10, lr=5e-2)

epoch,train_loss,valid_loss,_rmse,time
0,1.417418,1.028195,1.013999,00:00
1,1.142217,0.976013,0.987934,00:00
2,1.022015,0.859396,0.927036,00:00
3,0.950629,0.816512,0.90361,00:00
4,0.901302,0.787643,0.887493,00:00
5,0.865102,0.772275,0.878792,00:00
6,0.838625,0.774557,0.880089,00:00
7,0.817726,0.76504,0.874666,00:00
8,0.800567,0.75833,0.870822,00:00
9,0.786278,0.760223,0.871907,00:00


### Forecast based on latent space

Now we create a model that is a wrapper for an autoencoder to forecast a regression or classification based on the latent space from an autoencoder.

In [None]:
class AutoencoderForecast(nn.Module):
    def __init__(self, autoencoder, forecast_model):
        super().__init__()
        self.autoencoder = autoencoder
        self.forecast_model = forecast_model
        
    def forward(self, categorical_data, continuous_data):
        
        latent_space = self.autoencoder.encode(categorical_data, continuous_data)
        yhat = self.forecast_model(categorical_data, latent_space)
        
        return yhat
        

Now we create the data loader that has the target feature as output in the dataloader.

In [None]:
freeze(learn.model)

In [None]:
print_requires_grad(learn.model)

 (encoder): (
  (Identity())
  (ModuleList())
  (Dropout(p=0.0, inplace=False))
  (BatchNorm1d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)) Requires grad: False
   (layers): (
    Sequential (0): (
      (Linear(in_features=20, out_features=100, bias=False)) Requires grad: False
      (ReLU(inplace=True))
      (BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)) Requires grad: False
    )
    Sequential (1): (
      (Linear(in_features=100, out_features=5, bias=True)) Requires grad: False
    )
  )
)
 (decoder): (
  (Identity())
  (ModuleList())
  (Dropout(p=0.0, inplace=False))
  (BatchNorm1d(5, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)) Requires grad: False
   (layers): (
    Sequential (0): (
      (Linear(in_features=5, out_features=100, bias=False)) Requires grad: False
      (ReLU(inplace=True))
      (BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)) Requires grad: False


In [None]:
dls = TabularDataLoaders.from_df(df, cont_names=x_names, y_names="y", deivce="cpu",procs=Normalize, bs=N//10)
mlp_regression = MultiLayerPerceptron([ann_structure[-1], get_c(dls)])
model = AutoencoderForecast(learn.model, mlp_regression)

In [None]:
learn = Learner(dls, model, metrics=rmse)

In [None]:
learn.summary()

AutoencoderForecast (Input shape: 200 x torch.Size([200, 20]))
Layer (type)         Output Shape         Param #    Trainable 
                     []                  
Identity                                                       
BatchNorm1d                               40         False     
____________________________________________________________________________
                     200 x 100           
Linear                                    2000       False     
ReLU                                                           
BatchNorm1d                               200        False     
____________________________________________________________________________
                     200 x 5             
Linear                                    505        False     
ReLU                                                           
Identity                                                       
BatchNorm1d                               10         True      
_________________

In [None]:
learn.fit(5, lr=1e-2)

epoch,train_loss,valid_loss,_rmse,time
0,0.326834,0.217491,0.46636,00:00
1,0.235854,0.11183,0.33441,00:00
2,0.181198,0.065184,0.255312,00:00
3,0.142332,0.039793,0.199481,00:00
4,0.114708,0.032569,0.180468,00:00


## Variational Autoencoder

### VAE Training

In [None]:
class UnFlatten(nn.Module):
        
    def forward(self, input, dims):
        return input.view(*dims)

In [None]:
class VariationalAutoencoder(Autoencoder):
    def __init__(self, encoder, decoder, h_dim, z_dim):
        super().__init__(encoder, decoder)
        self.h_dim = h_dim
        self.z_dim = z_dim
        self.flatten = Flatten()
        self.unflatten = UnFlatten()
        
        self.hidden2mu = nn.Linear(h_dim, z_dim)
        self.hidden2logvar = nn.Linear(h_dim, z_dim)
        self.latent_dimensions = None
        self._mu, self._logvar = None, None
        
    def encode(self, categorical_data, continuous_data, as_np=False):
        
        x_hidden = self.encoder(categorical_data, continuous_data)
        
        self.latent_dimensions = x_hidden.shape
        
        x_hidden = self.flatten(x_hidden)
        
        mu, logvar = self.hidden2mu(x_hidden), self.hidden2logvar(x_hidden)
        
        # required for vae loss
        self._mu, self._logvar = mu, logvar
        
        z = self.reparam(mu, logvar)
        
        if as_np: return to_np(z)
        else: return z

    def decode(self, categorical_data, continuous_data, as_np=False, latent_dimensions=None):
        
        if not latent_dimensions and not self.latent_dimensions:
            raise ValueError("latent_dimensions are not set to unflatten data.")
        if not latent_dimensions:
            latent_dimensions = self.latent_dimensions
            
        x = self.unflatten(continuous_data, latent_dimensions)
        
        x = self.decoder(categorical_data, x)
        
        if as_np: return to_np(x)
        else: return x
        
    def get_posteriors(self, categorical_data, continuous_data):

        return self.encode(continuous_data, categorical_data)

    def get_z(self, categorical_data, continuous_data):
        """Encode a batch of data points, x, into their z representations."""

        mu, logvar = self.encode(categorical_data, continuous_data)
        
        return self.reparam(mu, logvar)

    def reparam(self, mu, logvar):
        """Reparameterisation trick to sample z values.
        This is stochastic during training, and returns the mode during evaluation."""

        if self.training:
            # convert logarithmic variance to standard deviation representation
            std = torch.exp(logvar / 2)
            
            # create normal distribution as large as the data
            eps = torch.randn_like(std)
            # scale by learned mean and standard deviation
            return mu + eps*std
        else:
            return mu



In [None]:
dls = TabularDataLoaders.from_df(df, cont_names=x_names, y_names=x_names, deivce="cpu", procs=Normalize, bs=N//10)

In [None]:
num_features = get_c(dls)
ann_structure = [num_features, num_features*5, 5]
ae = VariationalAutoencoder(MultiLayerPerceptron(ann_structure), 
                            MultiLayerPerceptron(ann_structure[::-1]), 
                            ann_structure[-1], ann_structure[-1])

In [None]:
learn = Learner(dls, ae, loss_func=VAEReconstructionLoss(ae), metrics=rmse)

In [None]:
learn.fit(10, lr=5e-2)

epoch,train_loss,valid_loss,_rmse,time
0,1.603892,1.039223,0.997177,00:00
1,1.352651,1.030828,0.993362,00:00
2,1.233508,1.002076,0.987172,00:00
3,1.165651,0.984741,0.977986,00:00
4,1.121756,0.970083,0.971174,00:00
5,1.090632,0.965951,0.966367,00:00
6,1.068473,0.957949,0.961562,00:00
7,1.051438,0.95573,0.95674,00:00
8,1.037688,0.954759,0.954648,00:00
9,1.027215,0.944164,0.947959,00:00


### Forecast based on latent space

In [None]:
freeze(learn.model)

In [None]:
dls = TabularDataLoaders.from_df(df, cont_names=x_names, y_names="y", deivce="cpu",procs=Normalize, bs=N//10)
mlp_regression = MultiLayerPerceptron([ann_structure[-1], get_c(dls)])
model = AutoencoderForecast(learn.model, mlp_regression)

In [None]:
learn = Learner(dls, model, metrics=rmse)
learn.summary()

AutoencoderForecast (Input shape: 200 x torch.Size([200, 20]))
Layer (type)         Output Shape         Param #    Trainable 
                     []                  
Identity                                                       
BatchNorm1d                               40         False     
____________________________________________________________________________
                     200 x 100           
Linear                                    2000       False     
ReLU                                                           
BatchNorm1d                               200        False     
____________________________________________________________________________
                     200 x 5             
Linear                                    505        False     
ReLU                                                           
Flatten                                                        
Linear                                    30         False     
Linear           

In [None]:
learn.fit(5, lr=1e-2)

epoch,train_loss,valid_loss,_rmse,time
0,0.24367,0.117833,0.343267,00:00
1,0.176788,0.073938,0.271916,00:00
2,0.134227,0.046644,0.215971,00:00
3,0.10605,0.029965,0.173103,00:00
4,0.085935,0.022111,0.148699,00:00


## Checks that the autoencoders also work with temporal data

In [None]:
ts_length = 24
n_features = 10
n_samples = 3
latent_dim  = 2 
ann_structure = [10, latent_dim]

x = torch.randn(( n_samples, n_features,ts_length), requires_grad=True)

In [None]:
ae_tcn = Autoencoder(TemporalCNN(ann_structure), TemporalCNN(ann_structure[::-1]))

In [None]:
yhat = ae_tcn(None, x)

test_eq(True, yhat.requires_grad)
test_eq([n_samples, n_features, ts_length], list(yhat.shape))

In [None]:
vae_tcn = VariationalAutoencoder(TemporalCNN(ann_structure), 
                                TemporalCNN(ann_structure[::-1]),
                               ann_structure[-1]*ts_length, ann_structure[-1]*ts_length)

In [None]:
yhat = vae_tcn(None, x)

test_eq(True, yhat.requires_grad)
test_eq([n_samples, n_features, ts_length], list(yhat.shape))
test_eq([n_samples, latent_dim*ts_length], list(vae_tcn._mu.shape))
test_eq([n_samples, latent_dim*ts_length], list(vae_tcn._logvar.shape))