In [None]:
# default_exp models.autoencoders

# models.autoencoders

> API details.

In [None]:
# export
import numpy as np
import torch
from torch import nn
from fastrenewables.tabular.model import *
from fastrenewables.timeseries.model import *
from fastai.tabular.all import *
from torch.autograd import Variable


In [None]:
ann_structure = [10,2]

In [None]:
# export
class Autoencoder(nn.Module):
    def __init__(self,encoder, decoder):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        
    def encode(self, categorical_data, continuous_data, as_np=False):
        z = self.encoder(categorical_data, continuous_data)
        
        if as_np: return to_np(z)
        else: return z
        
    
    def decode(self, categorical_data, continuous_data, as_np=False):
        x = self.decoder(categorical_data, continuous_data)
        
        if as_np: return to_np(x)
        else: return x
        
    def forward(self, categorical_data, continuous_data):
        x = self.encode(categorical_data, continuous_data)
        x = self.decode(categorical_data, x)
        
        return x

In [None]:
ae = Autoencoder(MultiLayerPerceptron(ann_structure), MultiLayerPerceptron(ann_structure[::-1]))

In [None]:
ae.encoder

MultiLayerPerceptron(
  (final_activation): Identity()
  (embeds): ModuleList()
  (emb_drop): Dropout(p=0.0, inplace=False)
  (bn_cont): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layers): Sequential(
    (0): LinBnDrop(
      (0): Linear(in_features=10, out_features=2, bias=True)
    )
  )
)

In [None]:
x = torch.randn((3,10), requires_grad=True)
yhat = ae(None, x)
yhat.requires_grad

True

In [None]:
yhat

tensor([[-0.2488,  0.7525, -0.8032,  0.1610, -0.7692, -1.1662, -0.3459, -0.8292,
         -0.9420,  0.3740],
        [ 0.2194, -0.1126, -0.9293,  0.4079, -0.3220,  1.0201,  0.1839,  0.1158,
          0.0490, -0.7282],
        [ 0.7240,  0.1649, -0.1558,  1.1118, -0.8921, -0.0425, -0.0862, -0.7931,
         -0.4248,  0.2214]], grad_fn=<AddmmBackward>)

In [None]:
ae_tcn = Autoencoder(TemporalCNN(ann_structure), TemporalCNN(ann_structure[::-1]))
ae_tcn

Autoencoder(
  (encoder): TemporalCNN(
    (bn_cont): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (layers): TemporalConvNet(
      (temporal_blocks): Sequential(
        (0): ResidualBlock(
          (conv1): Conv1d(10, 2, kernel_size=(3,), stride=(1,), padding=(2,))
          (chomp1): Chomp1d()
          (act_func1): Identity()
          (dropout1): Dropout2d(p=0.0, inplace=False)
          (conv2): Conv1d(2, 2, kernel_size=(3,), stride=(1,), padding=(2,))
          (chomp2): Chomp1d()
          (act_func2): Identity()
          (dropout2): Dropout2d(p=0.0, inplace=False)
          (net): Sequential(
            (0): Conv1d(10, 2, kernel_size=(3,), stride=(1,), padding=(2,))
            (1): Chomp1d()
            (2): Identity()
            (3): Dropout2d(p=0.0, inplace=False)
            (4): Conv1d(2, 2, kernel_size=(3,), stride=(1,), padding=(2,))
            (5): Chomp1d()
            (6): Identity()
            (7): Dropout2d(p=0.0, inplac

In [None]:
x = torch.randn((3,10,2), requires_grad=True)
yhat = ae_tcn(None, x)
yhat.requires_grad, yhat.shape

(True, torch.Size([3, 10, 2]))

In [None]:
yhat[0]

tensor([[-0.0477,  0.0091],
        [ 0.1478,  0.0880],
        [-0.2532, -0.1563],
        [-0.2404, -0.1595],
        [ 0.4216,  0.1389],
        [ 0.4037,  0.1737],
        [-0.3920, -0.1086],
        [ 0.1082, -0.0079],
        [-0.3735, -0.1348],
        [-0.0016, -0.0938]], grad_fn=<SelectBackward>)

In [None]:
class UnFlatten(nn.Module):
#     def __init__(self, size):
#         self.size = size
        
    def forward(self, input, dims):
        return input.view(*dims)

In [None]:
class VariationalAutoencoder(Autoencoder):
    def __init__(self, encoder, decoder, h_dim, z_dim):
        super().__init__(encoder, decoder)
        self.h_dim = h_dim
        self.z_dim = z_dim
        self.flatten = Flatten()
        self.unflatten = UnFlatten()
        
        self.hidden2mu = nn.Linear(h_dim, z_dim)
        self.hidden2logvar = nn.Linear(h_dim, z_dim)
        self.latent_dimensions = None
        
    def encode(self, categorical_data, continuous_data, as_np=False):
        
        x_hidden = self.encoder(categorical_data, continuous_data)
        
        self.latent_dimensions = x_hidden.shape
        
        x_hidden = self.flatten(x_hidden)
        
        mu, logvar = self.hidden2mu(x_hidden), self.hidden2logvar(x_hidden)
        z = self.reparam(mu, logvar)
        
        if as_np: return to_np(z)
        else: return z
        
    def decode(self, categorical_data, continuous_data, as_np=False, latent_dimensions=None):
        
        if not latent_dimensions and not self.latent_dimensions:
            raise ValueError("latent_dimensions are not set to unflatten data.")
        if not latent_dimensions:
            latent_dimensions = self.latent_dimensions
            
        x = self.unflatten(continuous_data, latent_dimensions)
        
        x = self.decoder(categorical_data, x)
        
        if as_np: return to_np(x)
        else: return x
        
    def get_posteriors(self, categorical_data, continuous_data):

        return self.encode(continuous_data, categorical_data)

    def get_z(self, categorical_data, continuous_data):
        """Encode a batch of data points, x, into their z representations."""

        mu, logvar = self.encode(categorical_data, continuous_data)
        return self.reparam(mu, logvar)

    def reparam(self, mu, logvar):
        """Reparameterisation trick to sample z values.
        This is stochastic during training, and returns the mode during evaluation."""

        if self.training:
            # convert logarithmic variance to standard deviation representation
            std = logvar.mul(0.5).exp_()
            # create normal distribution as large as the data
#             eps = Variable(std.data.new(std.size()).normal_())
            eps = torch.randn_like(std)
            # scale by learned mean and standard deviation
            return mu + eps*std
        else:
            return mu



In [None]:
x = torch.randn((3,10), requires_grad=True)
x.shape

torch.Size([3, 10])

In [None]:
enc = MultiLayerPerceptron(ann_structure)
dec = MultiLayerPerceptron(ann_structure[::-1])

vae = VariationalAutoencoder(enc, dec, ann_structure[-1], ann_structure[-1])

In [None]:
vae.training = True
vae(None, x)

tensor([[ 0.6188,  0.3713,  1.0308, -2.3945,  1.4508,  0.5034,  0.6243, -0.4527,
          0.3466,  1.2480],
        [-0.5371, -0.0545, -0.1983,  0.4750, -0.2552, -0.8372,  0.4712, -0.5721,
          0.2953, -1.6316],
        [-0.7446,  0.3116,  0.0760,  0.2557, -0.3510, -0.3959,  0.5713, -0.6517,
          0.4867, -1.4144]], grad_fn=<AddmmBackward>)

In [None]:
ts_length = 2

In [None]:
ae_tcn = VariationalAutoencoder(TemporalCNN(ann_structure), 
                                TemporalCNN(ann_structure[::-1]),
                               ann_structure[-1]*ts_length, ann_structure[-1]*ts_length)
# ae_tcn

In [None]:
x = torch.randn((3,10,ts_length), requires_grad=True)
yhat = ae_tcn(None, x)
yhat.requires_grad, yhat.shape

(True, torch.Size([3, 10, 2]))

In [None]:
yhat

tensor([[[-0.6324,  0.4716],
         [-0.1380,  0.1203],
         [-0.0890,  0.6074],
         [ 0.3778, -0.5755],
         [-0.1370,  0.2466],
         [ 0.0231,  0.0706],
         [ 0.4997, -0.6765],
         [-0.3776,  0.7519],
         [-0.3856,  0.3849],
         [ 0.5312, -0.6880]],

        [[-0.6352, -0.9405],
         [ 0.9391, -0.8131],
         [ 1.7122, -0.7292],
         [-0.6994,  0.7353],
         [-0.8056,  0.0767],
         [ 0.1449,  0.0605],
         [-0.8067,  0.9382],
         [ 1.1164, -0.6610],
         [-0.5468, -0.5982],
         [-0.4496,  1.0455]],

        [[ 0.4085,  1.0145],
         [ 0.0333, -0.5072],
         [-0.0504, -1.4090],
         [-0.2253,  0.2217],
         [ 0.2016,  0.7672],
         [-0.0300, -0.1834],
         [-0.2160,  0.3513],
         [ 0.1610, -0.7039],
         [ 0.1965,  0.5474],
         [-0.3160, -0.0773]]], grad_fn=<AddBackward0>)

TODO:

**should work for tcn and mlp**
- aletoric uncertainty layer/wrapper 
    wrapper.forward(x)
        mu = model(x)
        std = softmax(x)
        return mu, std
        
- aletoric uncertainty loss