<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->

In [None]:
# default_exp models.autoencoders

In [None]:
# export
import numpy as np
import torch
from torch import nn
from fastrenewables.tabular.model import *
from fastrenewables.timeseries.model import *
from fastai.tabular.all import *
from torch.autograd import Variable
from sklearn.datasets import make_regression
from fastai.learner import *
from fastrenewables.utils_pytorch import *
from fastrenewables.losses import VAEReconstructionLoss
from blitz.utils import variational_estimator
from fastrenewables.utils_blitz import set_train_mode

## Autoencoder

In [None]:
# export
@variational_estimator
class Autoencoder(nn.Module):
    def __init__(self, encoder, decoder):
        super().__init__()
        
        self.encoder = encoder
        
        self.decoder = decoder
        
    def encode(self, categorical_data, continuous_data, as_np=False):
        z = self.encoder(categorical_data, continuous_data)
        
        if as_np: return to_np(z)
        else: return z
        
    
    def decode(self, categorical_data, continuous_data, as_np=False):
        x = self.decoder(categorical_data, continuous_data)
        
        if as_np: return to_np(x)
        else: return x
        
    def forward(self, categorical_data, continuous_data):
        x = self.encode(categorical_data, continuous_data)
        x = self.decode(categorical_data, x)
        
        return x
    
    def train(self, mode: bool = True):
        super().train(mode)
        set_train_mode(self, mode)

### AE Training

Lets create some data that we wann to compress through an autoencoder.

In [None]:
N = 2000
X, y = make_regression(n_samples=N, n_features=20, n_informative=15)
df = pd.DataFrame(X)
x_names = [str(c) for c in df.columns]
df.columns = x_names
df["y"] = (y - y.min())/(y.max()-y.min())
dls = TabularDataLoaders.from_df(df, cont_names=x_names, y_names=x_names, deivce="cpu", procs=Normalize, bs=N//10)

In [None]:
df["y"].describe()

count    2000.000000
mean        0.536016
std         0.141274
min         0.000000
25%         0.440804
50%         0.538036
75%         0.632638
max         1.000000
Name: y, dtype: float64

In [None]:
dls.one_batch()[1].shape, dls.one_batch()[2].shape

(torch.Size([200, 20]), torch.Size([200, 20]))

In [None]:
num_features = get_c(dls)
ann_structure = [num_features, num_features*5, 5]
ae = Autoencoder(MultiLayerPerceptron(ann_structure), MultiLayerPerceptron(ann_structure[::-1]))

In [None]:
learn = Learner(dls, ae, metrics=rmse)

In [None]:
learn.fit(10, lr=5e-2)

epoch,train_loss,valid_loss,_rmse,time
0,1.428278,1.092438,1.045198,00:00
1,1.152587,1.000705,1.000353,00:00
2,1.028257,0.876347,0.936134,00:00
3,0.952321,0.812868,0.901592,00:00
4,0.902199,0.810575,0.90032,00:00
5,0.868325,0.785216,0.886124,00:00
6,0.841133,0.774011,0.879779,00:00
7,0.820236,0.762218,0.873051,00:00
8,0.803485,0.765871,0.875141,00:00
9,0.789689,0.759147,0.871291,00:00


### Forecast based on latent space

Now we create a model that is a wrapper for an autoencoder to forecast a regression or classification based on the latent space from an autoencoder.

In [None]:
# export
@variational_estimator
class AutoencoderForecast(nn.Module):
    def __init__(self, autoencoder, forecast_model):
        super().__init__()
        self.autoencoder = autoencoder
        self.forecast_model = forecast_model
        
    def forward(self, categorical_data, continuous_data):
        
        latent_space = self.autoencoder.encode(categorical_data, continuous_data)
        yhat = self.forecast_model(categorical_data, latent_space)
        
        return yhat
    
    def train(self, mode: bool = True):
        super().train(mode)
        set_train_mode(self, mode)

Now we create the data loader that has the target feature as output in the dataloader.

In [None]:
freeze(learn.model)

In [None]:
print_requires_grad(learn.model)

 (encoder): (
  (Identity())
  (ModuleList())
  (Dropout(p=0.0, inplace=False))
  (BatchNorm1d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)) Requires grad: False
   (layers): (
    Sequential (0): (
      (Linear(in_features=20, out_features=100, bias=False)) Requires grad: False
      (ReLU(inplace=True))
      (BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)) Requires grad: False
    )
    Sequential (1): (
      (Linear(in_features=100, out_features=5, bias=True)) Requires grad: False
    )
  )
)
 (decoder): (
  (Identity())
  (ModuleList())
  (Dropout(p=0.0, inplace=False))
  (BatchNorm1d(5, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)) Requires grad: False
   (layers): (
    Sequential (0): (
      (Linear(in_features=5, out_features=100, bias=False)) Requires grad: False
      (ReLU(inplace=True))
      (BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)) Requires grad: False


In [None]:
dls = TabularDataLoaders.from_df(df, cont_names=x_names, y_names="y", deivce="cpu",procs=Normalize, bs=N//10)
mlp_regression = MultiLayerPerceptron([ann_structure[-1], get_c(dls)])
model = AutoencoderForecast(learn.model, mlp_regression)

In [None]:
learn = Learner(dls, model, metrics=rmse)

In [None]:
learn.summary()

AutoencoderForecast (Input shape: 200 x torch.Size([200, 20]))
Layer (type)         Output Shape         Param #    Trainable 
                     []                  
Identity                                                       
BatchNorm1d                               40         False     
____________________________________________________________________________
                     200 x 100           
Linear                                    2000       False     
ReLU                                                           
BatchNorm1d                               200        False     
____________________________________________________________________________
                     200 x 5             
Linear                                    505        False     
ReLU                                                           
Identity                                                       
BatchNorm1d                               10         True      
_________________

In [None]:
learn.fit(5, lr=1e-2)

epoch,train_loss,valid_loss,_rmse,time
0,0.349329,0.271204,0.520773,00:00
1,0.257536,0.129253,0.359518,00:00
2,0.196753,0.070072,0.264711,00:00
3,0.153731,0.04325,0.207967,00:00
4,0.122593,0.029275,0.171098,00:00


## Variational Autoencoder

### VAE Training

In [None]:
# export
class UnFlatten(nn.Module):
        
    def forward(self, input, dims):
        return input.view(*dims)

In [None]:
# export
@variational_estimator
class VariationalAutoencoder(Autoencoder):
    def __init__(self, encoder, decoder, h_dim, z_dim, is_ts=False, kernel_size=3):
        super().__init__(encoder, decoder)
        self.h_dim = h_dim
        self.z_dim = z_dim
        self.flatten = Flatten()
        self.unflatten = UnFlatten()
        
        
        if is_ts:
            self.hidden2mu = nn.Conv1d(h_dim, z_dim, kernel_size=kernel_size, padding=kernel_size//2)
            self.hidden2logvar = nn.Conv1d(h_dim, z_dim, kernel_size=kernel_size, padding=kernel_size//2)
        else:
            self.hidden2mu = nn.Linear(h_dim, z_dim)
            self.hidden2logvar = nn.Linear(h_dim, z_dim)
        
        
        self.latent_dimensions = None
        self._mu, self._logvar = None, None
        
    def encode(self, categorical_data, continuous_data, as_np=False):
        
        x_hidden = self.encoder(categorical_data, continuous_data)
        
#         self.latent_dimensions = x_hidden.shape
        
#         x_hidden = self.flatten(x_hidden)
        
        mu, logvar = self.hidden2mu(x_hidden), self.hidden2logvar(x_hidden)
        
        # required for vae loss
        self._mu, self._logvar = mu, logvar
        
        z = self.reparam(mu, logvar)
        
        if as_np: return to_np(z)
        else: return z

    def decode(self, categorical_data, continuous_data, as_np=False, latent_dimensions=None):
        
#         if not latent_dimensions and not self.latent_dimensions:
#             raise ValueError("latent_dimensions are not set to unflatten data.")
#         if not latent_dimensions:
#             latent_dimensions = self.latent_dimensions
            
#         x = self.unflatten(continuous_data, latent_dimensions)
        
        x = self.decoder(categorical_data, continuous_data)
        
        if as_np: return to_np(x)
        else: return x
        
    def get_posteriors(self, categorical_data, continuous_data):

        return self.encode(continuous_data, categorical_data)

    def get_z(self, categorical_data, continuous_data):
        """Encode a batch of data points, x, into their z representations."""

        mu, logvar = self.encode(categorical_data, continuous_data)
        
        return self.reparam(mu, logvar)

    def reparam(self, mu, logvar):
        """Reparameterisation trick to sample z values.
        This is stochastic during training, and returns the mode during evaluation."""

        if self.training:
            # convert logarithmic variance to standard deviation representation
            std = torch.exp(logvar / 2)
            
            # create normal distribution as large as the data
            eps = torch.randn_like(std)
            # scale by learned mean and standard deviation
            return mu + eps*std
        else:
            return mu

    def train(self, mode: bool = True):
        super().train(mode)
        set_train_mode(self, mode)

In [None]:
dls = TabularDataLoaders.from_df(df, cont_names=x_names, y_names=x_names, deivce="cpu", procs=Normalize, bs=N//10)

In [None]:
num_features = get_c(dls)
ann_structure = [num_features, num_features*5, 5]
ae = VariationalAutoencoder(MultiLayerPerceptron(ann_structure), 
                            MultiLayerPerceptron(ann_structure[::-1]), 
                            ann_structure[-1], ann_structure[-1])

In [None]:
learn = Learner(dls, ae, loss_func=VAEReconstructionLoss(ae), metrics=rmse)

In [None]:
learn.fit(10, lr=5e-2)

epoch,train_loss,valid_loss,_rmse,time
0,1.596399,1.065887,1.013617,00:00
1,1.332493,1.035455,0.998344,00:00
2,1.21852,1.016526,0.98919,00:00
3,1.15191,1.000161,0.976294,00:00
4,1.110286,0.987929,0.968629,00:00
5,1.081265,0.984784,0.961178,00:00
6,1.060152,0.977901,0.958488,00:00
7,1.044141,0.971379,0.955548,00:00
8,1.030627,0.96601,0.945443,00:00
9,1.0194,0.96397,0.943432,00:00


### Forecast based on latent space

In [None]:
freeze(learn.model)

In [None]:
dls = TabularDataLoaders.from_df(df, cont_names=x_names, y_names="y", deivce="cpu",procs=Normalize, bs=N//10)
mlp_regression = MultiLayerPerceptron([ann_structure[-1], get_c(dls)])
model = AutoencoderForecast(learn.model, mlp_regression)

In [None]:
learn = Learner(dls, model, metrics=rmse)
learn.summary()

AutoencoderForecast (Input shape: 200 x torch.Size([200, 20]))
Layer (type)         Output Shape         Param #    Trainable 
                     []                  
Identity                                                       
BatchNorm1d                               40         False     
____________________________________________________________________________
                     200 x 100           
Linear                                    2000       False     
ReLU                                                           
BatchNorm1d                               200        False     
____________________________________________________________________________
                     200 x 5             
Linear                                    505        False     
ReLU                                                           
Linear                                    30         False     
Linear                                    30         False     
Identity         

In [None]:
learn.fit(5, lr=1e-2)

epoch,train_loss,valid_loss,_rmse,time
0,0.292249,0.062326,0.249652,00:00
1,0.209268,0.033901,0.184123,00:00
2,0.152156,0.022407,0.149691,00:00
3,0.113911,0.019748,0.140529,00:00
4,0.089203,0.019103,0.138215,00:00


## Checks that the autoencoders also work with temporal data

In [None]:
ts_length = 24
n_features = 10
n_samples = 3
latent_dim  = 2 
ann_structure = [10, latent_dim]

x = torch.randn(( n_samples, n_features,ts_length), requires_grad=True)

In [None]:
ae_tcn = Autoencoder(TemporalCNN(ann_structure), TemporalCNN(ann_structure[::-1]))

In [None]:
yhat = ae_tcn(None, x)

test_eq(True, yhat.requires_grad)
test_eq([n_samples, n_features, ts_length], list(yhat.shape))

In [None]:
vae_tcn = VariationalAutoencoder(TemporalCNN(ann_structure), 
                                TemporalCNN(ann_structure[::-1]),
#                                ann_structure[-1]*ts_length, ann_structure[-1]*ts_length)
                                 ann_structure[-1], ann_structure[-1], is_ts=True)

In [None]:
yhat = vae_tcn(None, x)

test_eq(True, yhat.requires_grad)
test_eq([n_samples, n_features, ts_length], list(yhat.shape))
test_eq([n_samples, latent_dim,ts_length], list(vae_tcn._mu.shape))
test_eq([n_samples, latent_dim,ts_length], list(vae_tcn._logvar.shape))