In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np

In [2]:
batch = torch.tensor(np.random.rand(8, 1, 40, 126), dtype=torch.float32)

In [3]:
batch.shape

torch.Size([8, 1, 40, 126])

In [29]:
conv1 = nn.Conv2d(1, 64, 4, padding=1, stride=(2, 2))
conv2 = nn.Conv2d(64, 128, 4, padding=1, stride=(2, 2))
conv3 = nn.Conv2d(128, 256, 4, padding=1, stride=(2, 2))
conv4 = nn.Conv2d(256, 512, 4, padding=1, stride=(2, 2))

deconv1 = nn.ConvTranspose2d(512, 256, 2, stride=2)
deconv2 = nn.ConvTranspose2d(256, 128, 2, stride=2)
deconv3 = nn.ConvTranspose2d(128, 64, 2, stride=2)
deconv4 = nn.ConvTranspose2d(64, 1, 2, stride=2)

In [30]:
print(batch.size())
x = conv1(batch)
print(x.size())
x = conv2(x)
print(x.size())
x = conv3(x)
print(x.size())
x = conv4(x)
print(x.size())
size = x.size()
x = x.view(x.size(0), -1)
print(x.size())
x = x.view(size)
print(x.size())
x = deconv1(x)
print(x.size())
x = deconv2(x)
print(x.size())
x = deconv3(x)
print(x.size())
x = deconv4(x)
print(x.size())

torch.Size([8, 1, 40, 126])
torch.Size([8, 64, 20, 63])
torch.Size([8, 128, 10, 31])
torch.Size([8, 256, 5, 15])
torch.Size([8, 512, 2, 7])
torch.Size([8, 7168])
torch.Size([8, 512, 2, 7])
torch.Size([8, 256, 4, 14])
torch.Size([8, 128, 8, 28])
torch.Size([8, 64, 16, 56])
torch.Size([8, 1, 32, 112])


In [None]:
class ConvAutoencoder(nn.Module):
    def __init__(self):
        super(ConvAutoencoder, self).__init__()
        ## encoder layers ##
        # conv layer (depth from 1 --> 16), 3x3 kernels
        self.conv1 = nn.Conv2d(1, 16, 3, padding=1)  
        # conv layer (depth from 16 --> 4), 3x3 kernels
        self.conv2 = nn.Conv2d(16, 4, 3, padding=1)
        # pooling layer to reduce x-y dims by two; kernel and stride of 2
        self.pool = nn.MaxPool2d(2, 2)
        
        ## decoder layers ##
        ## a kernel of 2 and a stride of 2 will increase the spatial dims by 2
        self.t_conv1 = nn.ConvTranspose2d(4, 16, 2, stride=2)
        self.t_conv2 = nn.ConvTranspose2d(16, 1, 2, stride=2)


    def forward(self, x):
        ## encode ##
        # add hidden layers with relu activation function
        # and maxpooling after
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        # add second hidden layer
        x = F.relu(self.conv2(x))
        x = self.pool(x)  # compressed representation
        
        ## decode ##
        # add transpose conv layers, with relu activation function
        x = F.relu(self.t_conv1(x))
        # output layer (with sigmoid for scaling from 0 to 1)
        x = F.sigmoid(self.t_conv2(x))
                
        return x

# initialize the NN
model = ConvAutoencoder()