In [1]:
%load_ext autoreload
%autoreload 2

from Scripts.Components import *
from typing import Tuple
import torch
from torch import nn

sample_input = torch.rand((16, 1, 64, 500))

In [2]:
from torchsummary import summary

***

***

Advantages of depthwise convolutions: less parameters

In [33]:
class SimpleSepDepthAutoencoder(Autoencoder):
    def build_encoder(self, n_filters:int, kernel_size:int, depth:int):
        assert kernel_size % 2 != 0, 'kernel_size must be an odd number in order to auto padding to work'
        separable_convolution = SeparableConv2D(1, n_filters, kernel_size = (64, kernel_size), padding = (0, kernel_size//2))
        depthwise_convolution = DepthwiseConv2D(n_filters, depth*n_filters, kernel_size = (1, 5), padding = (0, 5//2), stride = (1, 2))
        
        return nn.Sequential(separable_convolution,
                             depthwise_convolution,
                             Transpose_1_2())
        
    def build_decoder(self, n_filters:int, kernel_size:int, depth:int):
        depthwise_deconvolution = nn.ConvTranspose2d(depth*n_filters, n_filters, kernel_size = (1, 5), padding = (0, 5//2), stride = (1, 2), output_padding = (0, 1))
        separable_deconvolution = SeparableDeconv2D(n_filters, 1, kernel_size = (64, kernel_size), padding = (0, kernel_size//2))
        
        return nn.Sequential(Transpose_1_2(),
                             depthwise_deconvolution,
                             separable_deconvolution)
    
    
class Transpose_1_2(nn.Module):
    def __init__(self):
        super(Transpose_1_2, self).__init__()
        
    def forward(self, X):
        return X.transpose(1, 2)

In [38]:
autoencoder = SimpleSepDepthAutoencoder(n_filters = 1, kernel_size = 25, depth = 250)

In [39]:
summary(autoencoder, (1, 64, 500), device = 'cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 1, 64, 476]              25
            Conv2d-2            [-1, 1, 1, 500]              64
   SeparableConv2D-3            [-1, 1, 1, 500]               0
            Conv2d-4          [-1, 250, 1, 250]           1,250
   DepthwiseConv2D-5          [-1, 250, 1, 250]               0
     Transpose_1_2-6          [-1, 1, 250, 250]               0
     Transpose_1_2-7          [-1, 250, 1, 250]               0
   ConvTranspose2d-8            [-1, 1, 1, 500]           1,251
   ConvTranspose2d-9           [-1, 1, 64, 476]              65
  ConvTranspose2d-10           [-1, 1, 64, 500]              26
SeparableDeconv2D-11           [-1, 1, 64, 500]               0
Total params: 2,681
Trainable params: 2,681
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.12
Forward/back

In [6]:
class PicSepDepthAutoencoder(Autoencoder):
    def build_encoder(self, n_filters:int, kernel_size:int):
        assert kernel_size % 2 != 0, 'kernel_size must be an odd number in order to auto padding to work'
        
        separable_convolution = SeparableConv2D(1, n_filters, kernel_size = (64, kernel_size), padding = (0, kernel_size // 2), stride = (1, 1))
        conv_size_reduction = nn.Sequential(
            nn.Conv2d(n_filters, n_filters, kernel_size = (1, 5), padding = (0, 2), stride = (1, 2)),
            nn.Conv2d(n_filters, n_filters, kernel_size = (1, 5), padding = (0, 2), stride = (1, 2))
        )
        make_pic = Transpose_1_2()        
            
        return nn.Sequential(separable_convolution,
                             conv_size_reduction,
                             make_pic)
        
    def build_decoder(self, n_filters:int, kernel_size:int):
        unmake_pic = Transpose_1_2()
        conv_size_increase = nn.Sequential(
            nn.ConvTranspose2d(n_filters, n_filters, kernel_size = (1, 5), padding = (0, 2), stride = (1, 2), output_padding = (0, 1)),
            nn.ConvTranspose2d(n_filters, n_filters, kernel_size = (1, 5), padding = (0, 2), stride = (1, 2), output_padding = (0, 1))
        )
        separable_deconvolution = SeparableDeconv2D(n_filters, 1, kernel_size = (64, kernel_size), padding = (0, kernel_size//2), stride = (1, 1))
        
        return nn.Sequential(unmake_pic,
                             conv_size_increase,
                             separable_deconvolution)
    
class Transpose_1_2(nn.Module):
    def __init__(self):
        super(Transpose_1_2, self).__init__()
        
    def forward(self, X):
        return X.transpose(1, 2)

In [76]:
autoencoder = PicSepDepthAutoencoder(n_filters = 125, kernel_size = 25)
summary(autoencoder, (1, 64, 500), device = 'cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 125, 64, 476]           3,125
            Conv2d-2          [-1, 125, 1, 500]       1,000,000
   SeparableConv2D-3          [-1, 125, 1, 500]               0
            Conv2d-4          [-1, 125, 1, 250]          78,250
            Conv2d-5          [-1, 125, 1, 125]          78,250
     Transpose_1_2-6          [-1, 1, 125, 125]               0
     Transpose_1_2-7          [-1, 125, 1, 125]               0
   ConvTranspose2d-8          [-1, 125, 1, 250]          78,250
   ConvTranspose2d-9          [-1, 125, 1, 500]          78,250
  ConvTranspose2d-10         [-1, 125, 64, 476]       1,000,125
  ConvTranspose2d-11           [-1, 1, 64, 500]           3,126
SeparableDeconv2D-12           [-1, 1, 64, 500]               0
Total params: 2,319,376
Trainable params: 2,319,376
Non-trainable params: 0
---------------------------

In [91]:
class SimpleAutoencoder(Autoencoder):
    def build_encoder(self, n_filters:int, kernel_size:int, stride:int):
        assert kernel_size %2 != 0, 'kernel_size must be an odd number in order to auto padding to work'
        encoder = nn.Sequential(
            nn.Conv2d(1, n_filters, kernel_size = (1, kernel_size), padding = (0, kernel_size//2), stride = (1, stride)),
            nn.BatchNorm2d(n_filters),
            nn.ReLU(inplace = True),
            nn.Conv2d(n_filters, n_filters, kernel_size = (1, 3), padding = (0, 1), stride = (1, 2))
        )
        
        return encoder
        
    def build_decoder(self, n_filters:int, kernel_size:int, stride:int):
        assert kernel_size %2 != 0, 'kernel_size must be an odd number in order to auto padding to work'
        decoder = nn.Sequential(
            nn.ConvTranspose2d(n_filters, n_filters, kernel_size = (1, 3), padding = (0, 1), stride = (1, 2), output_padding = (0, 1)),
            nn.ReLU(inplace = True),
            nn.BatchNorm2d(n_filters),
            nn.ConvTranspose2d(n_filters, 1, kernel_size = (1, kernel_size), padding = (0, kernel_size//2), stride = (1, stride), output_padding = (0, stride - 1)),
        )
        
        return decoder

In [93]:
autoencoder = SimpleAutoencoder(n_filters = 1, kernel_size = 5, stride = 2)
summary(autoencoder, (1, 64, 500), device = 'cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 1, 64, 250]               6
       BatchNorm2d-2           [-1, 1, 64, 250]               2
              ReLU-3           [-1, 1, 64, 250]               0
            Conv2d-4           [-1, 1, 64, 125]               4
   ConvTranspose2d-5           [-1, 1, 64, 250]               4
              ReLU-6           [-1, 1, 64, 250]               0
       BatchNorm2d-7           [-1, 1, 64, 250]               2
   ConvTranspose2d-8           [-1, 1, 64, 500]               6
Total params: 24
Trainable params: 24
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.12
Forward/backward pass size (MB): 1.04
Params size (MB): 0.00
Estimated Total Size (MB): 1.16
----------------------------------------------------------------


In [3]:
class SepConvAutoencoder(Autoencoder):
    def build_encoder(self, n_filters, kernel_size):
        first_conv = nn.Conv2d(1, n_filters, kernel_size = (1, kernel_size), padding = (0, kernel_size//2))
        separable_convolution = SeparableConv2D(n_filters, n_filters, kernel_size = (64, kernel_size), padding = (0, kernel_size//2), stride = (1, 2))
        
        return nn.Sequential(first_conv,
                             separable_convolution)
    
    def build_decoder(self, n_filters, kernel_size):
        separable_deconvolution = SeparableDeconv2D(n_filters, n_filters, kernel_size = (64, kernel_size), padding = (0, kernel_size//2), stride = (1, 2))
        deconv = nn.ConvTranspose2d(n_filters, 1, kernel_size = (1, kernel_size), padding = (0, kernel_size//2))
        
        return nn.Sequential(separable_deconvolution,
                             deconv)

In [4]:
autoencoder = SepConvAutoencoder(n_filters = 64, kernel_size = 5)
summary(autoencoder, (1, 64, 500), device = 'cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 64, 64, 500]             384
            Conv2d-2          [-1, 64, 64, 250]             320
            Conv2d-3           [-1, 64, 1, 250]           4,096
   SeparableConv2D-4           [-1, 64, 1, 250]               0
   ConvTranspose2d-5          [-1, 64, 64, 250]           4,160
   ConvTranspose2d-6          [-1, 64, 64, 500]          20,544
 SeparableDeconv2D-7          [-1, 64, 64, 500]               0
   ConvTranspose2d-8           [-1, 1, 64, 500]             321
Total params: 29,825
Trainable params: 29,825
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.12
Forward/backward pass size (MB): 62.99
Params size (MB): 0.11
Estimated Total Size (MB): 63.22
----------------------------------------------------------------


Input: (B, C, 1, T)

In [6]:
class mAutoencoder(Autoencoder):
    def build_encoder(self, depth:int, kernel_size, n_filters: int):
        sep_conv = SeparableConv2D(1, n_filters, kernel_size = (64, kernel_size), padding =  (0, kernel_size//2))
        # High-level feature maps, time compression
        high_level_features = DepthwiseSeparableConv2D(n_filters, depth * n_filters, depth = depth, kernel_size = (1, kernel_size), padding = (0, kernel_size//2),
                                                       stride = (1, 2))
        # Pic Encoding
        pic_encoding = Transpose_1_2()
        
        return nn.Sequential(
            sep_conv,
            high_level_features,
            pic_encoding,
        )
    
    def build_decoder(self, depth:int, kernel_size:int, n_filters:int):
        # Pic Decoding
        pic_decoding = Transpose_1_2()
        # Info reconstruction (really necessary?)
        high_level_features_decoding = DepthwiseSeparableDeconv2D(depth * n_filters, n_filters, kernel_size = (1, kernel_size), padding = (0, kernel_size // 2),
                                                                  stride = (1, 2))
        sep_deconv = SeparableDeconv2D(n_filters, 1, kernel_size = (64, kernel_size), padding = (0, kernel_size//2))
        
        return nn.Sequential(
            pic_decoding,
            high_level_features_decoding,
            sep_deconv,
        )
    
class Transpose_1_2(nn.Module):
    def __init__(self):
        super(Transpose_1_2, self).__init__()
        
    def forward(self, X):
        return X.transpose(1, 2)

In [8]:
autoencoder = mAutoencoder(depth = 1, kernel_size = 5, n_filters = 64)
summary(autoencoder, (1, 64, 500), device = 'cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 64, 64, 500]             320
            Conv2d-2           [-1, 64, 1, 500]           4,096
   SeparableConv2D-3           [-1, 64, 1, 500]               0
            Conv2d-4           [-1, 64, 1, 250]             320
            Conv2d-5           [-1, 64, 1, 250]           4,096
DepthwiseSeparableConv2D-6           [-1, 64, 1, 250]               0
     Transpose_1_2-7           [-1, 1, 64, 250]               0
     Transpose_1_2-8           [-1, 64, 1, 250]               0
            Conv2d-9           [-1, 64, 1, 250]           4,096
  ConvTranspose2d-10           [-1, 64, 1, 500]             320
DepthwiseSeparableDeconv2D-11           [-1, 64, 1, 500]               0
  ConvTranspose2d-12          [-1, 64, 64, 500]           4,160
  ConvTranspose2d-13           [-1, 1, 64, 500]             321
SeparableDeconv2D-14    

In [47]:
class MiddleFlow(nn.Module):
    def __init__(self, num_maps, kernel_size):
        super(MiddleFlow, self).__init__()
        self.flow = nn.Sequential(
            nn.ReLU(),
            DepthwiseSeparableConv2D(num_maps, num_maps, kernel_size, padding = 1),
            nn.ReLU(),
            DepthwiseSeparableConv2D(num_maps, num_maps, kernel_size, padding = 1),
            nn.ReLU(),
            DepthwiseSeparableConv2D(num_maps, num_maps, kernel_size, padding = 1)
        )
    
    def forward(self, X):
        return X + self.flow(X)
    
class ResidualDepthwiseSepBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ResidualDepthwiseSepBlock, self).__init__()
        self.flow = nn.Sequential(
            nn.ReLU(),
            DepthwiseSeparableConv2D(in_channels, out_channels, (1, 3), padding = (0, 1)),
            nn.ReLU(),
            DepthwiseSeparableConv2D(out_channels, out_channels, (1, 3), padding = (0, 1)),
            nn.Conv2d(out_channels, out_channels, (1, 3), stride = (1, 2), padding = (0, 1), groups = out_channels)
        )
        self.residue = nn.Conv2d(in_channels, out_channels, kernel_size = 1, stride = (1, 2))
        
    def forward(self, X):
        return self.flow(X) + self.residue(X)
    
class ResidualDepthwiseSepDeconvBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ResidualDepthwiseSepDeconvBlock, self).__init__()
        self.flow = nn.Sequential(
            nn.ReLU(),
            DepthwiseSeparableDeconv2D(in_channels, out_channels, kernel_size = (1, 3), padding = (0, 1)),
            nn.ReLU(),
            DepthwiseSeparableDeconv2D(out_channels, out_channels, kernel_size = (1, 3), padding = (0, 1)),
            nn.ConvTranspose2d(out_channels, out_channels, (1, 3), stride = (1, 2), padding = (0, 1), output_padding = (0, 1), groups = out_channels)
        )
        self.residue = nn.ConvTranspose2d(in_channels, out_channels, kernel_size = 1, stride = (1, 2), output_padding = (0, 1))
        
    def forward(self, X):
        return self.flow(X) + self.residue(X)
    
class DeepXAutoencoder(Autoencoder):
    def build_encoder(self, depth:int, kernel_size, n_filters: int):
        # Temporal processing
        temporal_filters = nn.Conv2d(1, n_filters, kernel_size = (1, kernel_size), padding = (0, kernel_size//2))
        high_level_features = ResidualDepthwiseSepBlock(n_filters, depth*n_filters)
        # Spatial processing, spatial compression
        spatial_filter   = nn.Conv2d(depth * n_filters, depth * n_filters, kernel_size = (64, 1), padding = (0, 0)) # [n_filters, 1, T]
        # Pic Encoding
        pic_encoding = Transpose_1_2()
        
        return nn.Sequential(
            temporal_filters,
            high_level_features,
            spatial_filter,
            pic_encoding,
        )
    
    def build_decoder(self, depth:int, kernel_size, n_filters: int):
        # Pic Decoding
        pic_decoding = Transpose_1_2()
        # Spatial reconstruction
        spatial_reconstruction = nn.ConvTranspose2d(depth * n_filters, depth * n_filters, kernel_size = (64, 1), padding = (0, 0))
        # Info reconstruction (really necessary?)
        high_level_features_decoding = ResidualDepthwiseSepDeconvBlock(depth * n_filters, n_filters)
        # Temporal reconstruction
        temporal_reconstruction = nn.ConvTranspose2d(n_filters, 1, kernel_size = (1, kernel_size), padding = (0, kernel_size//2))
        
        return nn.Sequential(
            pic_decoding,
            spatial_reconstruction,
            high_level_features_decoding,
            temporal_reconstruction,
        )

In [48]:
autoencoder = DeepXAutoencoder(depth = 1, kernel_size = 5, n_filters = 16)
summary(autoencoder, (1, 64, 500), device = 'cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 16, 64, 500]              96
              ReLU-2          [-1, 16, 64, 500]               0
            Conv2d-3          [-1, 16, 64, 500]              48
            Conv2d-4          [-1, 16, 64, 500]             256
DepthwiseSeparableConv2D-5          [-1, 16, 64, 500]               0
              ReLU-6          [-1, 16, 64, 500]               0
            Conv2d-7          [-1, 16, 64, 500]              48
            Conv2d-8          [-1, 16, 64, 500]             256
DepthwiseSeparableConv2D-9          [-1, 16, 64, 500]               0
           Conv2d-10          [-1, 16, 64, 250]              64
           Conv2d-11          [-1, 16, 64, 250]             272
ResidualDepthwiseSepBlock-12          [-1, 16, 64, 250]               0
           Conv2d-13           [-1, 16, 1, 250]          16,400
    Transpose_1_2-1