In [1]:
%run init_notebook.py

import torch
import torch.nn as nn
import torchaudio

# Adjust these imports according to your project structure:
from src.models import AutoEncoder
from src.config import CONV_KERNEL_SIZE, CONV_STRIDE, CONV_PADDING, POOL_KERNEL_SIZE, POOL_STRIDE, POOL_PADDING
from src.utils.models import compute_output_size, compute_flattened_size

# Define parameters
input_height = 64
input_width = 128
latent_dim = 30
in_channels = 1
filters = [32, 64, 128]

# Instantiate the autoencoder model.
# Make sure your AutoEncoder class is defined to accept (input_height, input_width, latent_dim, in_channels, filters)
model = AutoEncoder(input_height, input_width, latent_dim, in_channels, filters)
print("AutoEncoder model:")
print(model)

# Create a dummy input tensor: shape [batch_size, in_channels, input_height, input_width]
batch_size = 4
dummy_input = torch.randn(batch_size, in_channels, input_height, input_width)

# Pass the dummy input through the autoencoder d
output = model(dummy_input)

# Print input and output shapes
print("Input shape:", dummy_input.shape)
print("Output shape:", output.shape)

# Check if the output shape matches the input shape
if dummy_input.shape == output.shape:
    print("Success: Output shape matches input shape.")
else:
    print("Mismatch: Adjust output_padding in your decoder layers if necessary.")


Input channels: 1 and Output channels: 32
Input channels: 32 and Output channels: 64
Input channels: 64 and Output channels: 128
Final height: 8 and final width: 16
Height: 8
Width: 16
Num channels: 128
Multiplication is: 16384
Flattened size: 16384
AutoEncoder model:
AutoEncoder(
  (encoder): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (3): ReLU()
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (5): ReLU()
    (6): Flatten(start_dim=1, end_dim=-1)
    (7): Linear(in_features=16384, out_features=30, bias=True)
  )
  (decoder): Sequential(
    (0): Linear(in_features=30, out_features=16384, bias=True)
    (1): Unflatten(dim=1, unflattened_size=(128, 8, 16))
    (2): ConvTranspose2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1))
    (3): ReLU()
    (4): ConvTranspose2d(128, 64, kernel_