In [None]:
# default_exp models.autoencoders

# models.autoencoders

> API details.

In [None]:
# export
import numpy as np
import torch
from torch import nn
from fastrenewables.tabular.model import *
from fastrenewables.timeseries.model import *
from fastai.tabular.all import *
from torch.autograd import Variable


In [None]:
ann_structure = [10,2]

In [None]:
# export
class Autoencoder(nn.Module):
    def __init__(self,encoder, decoder):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        
    def encode(self, categorical_data, continuous_data, as_np=False):
        z = self.encoder(categorical_data, continuous_data)
        
        if as_np: return to_np(z)
        else: return z
        
    
    def decode(self, categorical_data, continuous_data, as_np=False):
        x = self.decoder(categorical_data, continuous_data)
        
        if as_np: return to_np(x)
        else: return x
        
    def forward(self, categorical_data, continuous_data):
        x = self.encode(categorical_data, continuous_data)
        x = self.decode(categorical_data, x)
        
        return x

In [None]:
ae = Autoencoder(MultiLayerPerceptron(ann_structure), MultiLayerPerceptron(ann_structure[::-1]))

In [None]:
ae.encoder

MultiLayerPerceptron(
  (final_activation): Identity()
  (embeds): ModuleList()
  (emb_drop): Dropout(p=0.0, inplace=False)
  (bn_cont): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layers): Sequential(
    (0): LinBnDrop(
      (0): Linear(in_features=10, out_features=2, bias=True)
    )
  )
)

In [None]:
x = torch.randn((3,10), requires_grad=True)
yhat = ae(None, x)
yhat.requires_grad

True

In [None]:
yhat

tensor([[ 0.9132, -0.1884, -0.2194, -0.2023,  1.1451, -0.1748,  1.5637, -1.6007,
         -0.0047, -1.0678],
        [ 1.1966, -0.2939,  1.4584,  0.7007,  0.5463,  0.9522, -0.0652,  0.3513,
         -0.1976, -1.0404],
        [-0.5589,  1.6906,  0.7701,  1.5558,  0.2138, -0.0460,  0.2347, -0.7715,
         -0.7836,  0.4259]], grad_fn=<AddmmBackward>)

In [None]:
ae_tcn = Autoencoder(TemporalCNN(ann_structure), TemporalCNN(ann_structure[::-1]))
ae_tcn

Autoencoder(
  (encoder): TemporalCNN(
    (bn_cont): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (layers): TemporalConvNet(
      (temporal_blocks): Sequential(
        (0): ResidualBlock(
          (conv1): Conv1d(10, 2, kernel_size=(3,), stride=(1,), padding=(2,))
          (chomp1): Chomp1d()
          (act_func1): Identity()
          (dropout1): Dropout2d(p=0.0, inplace=False)
          (conv2): Conv1d(2, 2, kernel_size=(3,), stride=(1,), padding=(2,))
          (chomp2): Chomp1d()
          (act_func2): Identity()
          (dropout2): Dropout2d(p=0.0, inplace=False)
          (net): Sequential(
            (0): Conv1d(10, 2, kernel_size=(3,), stride=(1,), padding=(2,))
            (1): Chomp1d()
            (2): Identity()
            (3): Dropout2d(p=0.0, inplace=False)
            (4): Conv1d(2, 2, kernel_size=(3,), stride=(1,), padding=(2,))
            (5): Chomp1d()
            (6): Identity()
            (7): Dropout2d(p=0.0, inplac

In [None]:
x = torch.randn((3,10,2), requires_grad=True)
yhat = ae_tcn(None, x)
yhat.requires_grad, yhat.shape

(True, torch.Size([3, 10, 2]))

In [None]:
yhat[0]

tensor([[-0.1177, -0.6364],
        [ 0.3726,  0.0438],
        [-0.1339,  0.5839],
        [ 0.1305,  0.5801],
        [-0.4334,  0.1272],
        [-0.1354, -0.2221],
        [-0.4284,  0.4048],
        [-0.3801,  0.1147],
        [ 0.5937, -0.0437],
        [ 0.2090, -0.1368]], grad_fn=<SelectBackward>)

In [None]:
class UnFlatten(nn.Module):
#     def __init__(self, size):
#         self.size = size
        
    def forward(self, input, dims):
        return input.view(*dims)

In [None]:
class VariationalAutoencoder(Autoencoder):
    def __init__(self, encoder, decoder, h_dim, z_dim):
        super().__init__(encoder, decoder)
        self.h_dim = h_dim
        self.z_dim = z_dim
        self.flatten = Flatten()
        self.unflatten = UnFlatten()
        
        self.hidden2mu = nn.Linear(h_dim, z_dim)
        self.hidden2logvar = nn.Linear(h_dim, z_dim)
        self.latent_dimensions = None
        
    def encode(self, categorical_data, continuous_data, as_np=False):
        
        x_hidden = self.encoder(categorical_data, continuous_data)
        
        self.latent_dimensions = x_hidden.shape
        
        x_hidden = self.flatten(x_hidden)
        
        mu, logvar = self.hidden2mu(x_hidden), self.hidden2logvar(x_hidden)
        z = self.reparam(mu, logvar)
        
        if as_np: return to_np(z)
        else: return z
        
    def decode(self, categorical_data, continuous_data, as_np=False, latent_dimensions=None):
        
        if not latent_dimensions and not self.latent_dimensions:
            raise ValueError("latent_dimensions are not set to unflatten data.")
        if not latent_dimensions:
            latent_dimensions = self.latent_dimensions
            
        x = self.unflatten(continuous_data, latent_dimensions)
        
        x = self.decoder(categorical_data, x)
        
        if as_np: return to_np(x)
        else: return x
        
    def get_posteriors(self, categorical_data, continuous_data):

        return self.encode(continuous_data, categorical_data)

    def get_z(self, categorical_data, continuous_data):
        """Encode a batch of data points, x, into their z representations."""

        mu, logvar = self.encode(categorical_data, continuous_data)
        return self.reparam(mu, logvar)

    def reparam(self, mu, logvar):
        """Reparameterisation trick to sample z values.
        This is stochastic during training, and returns the mode during evaluation."""

        if self.training:
            # convert logarithmic variance to standard deviation representation
            std = logvar.mul(0.5).exp_()
            # create normal distribution as large as the data
#             eps = Variable(std.data.new(std.size()).normal_())
            eps = torch.randn_like(std)
            # scale by learned mean and standard deviation
            return mu + eps*std
        else:
            return mu



In [None]:
x = torch.randn((3,10), requires_grad=True)
x.shape

torch.Size([3, 10])

In [None]:
enc = MultiLayerPerceptron(ann_structure)
dec = MultiLayerPerceptron(ann_structure[::-1])

vae = VariationalAutoencoder(enc, dec, ann_structure[-1], ann_structure[-1])

In [None]:
vae.training = True
vae(None, x)

tensor([[-0.5253,  0.3163,  0.7156, -0.6541, -0.1933, -0.1231,  0.3033,  0.6065,
          0.2843,  0.4410],
        [-0.2915,  0.2880, -1.1070,  0.0281,  1.3649, -0.6017,  0.3194, -0.6601,
         -0.3117, -1.2946],
        [ 0.0179,  0.4681,  0.3269,  0.1630,  0.0256, -0.2277,  1.0325,  0.3727,
         -0.2203,  0.4150]], grad_fn=<AddmmBackward>)

In [None]:
ts_length = 2

In [None]:
ae_tcn = VariationalAutoencoder(TemporalCNN(ann_structure), 
                                TemporalCNN(ann_structure[::-1]),
                               ann_structure[-1]*ts_length, ann_structure[-1]*ts_length)
# ae_tcn

In [None]:
x = torch.randn((3,10,ts_length), requires_grad=True)
yhat = ae_tcn(None, x)
yhat.requires_grad, yhat.shape

(True, torch.Size([3, 10, 2]))

In [None]:
yhat

tensor([[[ 0.3432,  0.2241],
         [-0.7341, -0.0931],
         [-1.1590, -0.2387],
         [ 0.4893, -0.0063],
         [-0.1362,  0.0840],
         [ 0.3904,  0.0279],
         [-0.0910, -0.2486],
         [ 0.1789,  0.2125],
         [-0.0817,  0.3451],
         [ 0.3341, -0.1267]],

        [[ 0.3429, -1.0165],
         [ 0.2852,  0.6506],
         [ 0.6184,  1.8768],
         [-0.8920, -0.6352],
         [ 1.3292, -0.6065],
         [-0.4793, -0.6716],
         [-1.3048,  0.7625],
         [ 0.6786, -0.9692],
         [ 1.2184, -0.3211],
         [-1.3480,  0.3064]],

        [[ 0.2466, -0.4678],
         [-0.3391, -0.1414],
         [-0.4986, -0.2558],
         [ 0.0832,  0.6815],
         [ 0.1803, -1.3973],
         [ 0.1181,  0.3336],
         [-0.2996,  1.3271],
         [ 0.2207, -0.7973],
         [ 0.1890, -1.0958],
         [-0.0813,  1.3002]]], grad_fn=<AddBackward0>)

TODO:

**should work for tcn and mlp**
- aletoric uncertainty layer/wrapper 
    wrapper.forward(x)
        mu = model(x)
        std = softmax(x)
        return mu, std
        
- aletoric uncertainty loss