In [1]:
import variableImageSize as vis
import os
from glob import glob
from PIL import Image
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import torch

In [2]:
# Path to dataset
root_dir = "./data/MNIST/mnist-varres/test/"

# Load images grouped by resolution
tensors_32, tensors_48, tensors_64 = vis.load_images_by_resolution(root_dir)
test_tensors_by_resolution = {
    32: tensors_32,
    48: tensors_48,
    64: tensors_64,
}
root_dir = "./data/MNIST/mnist-varres/train/"
tensors_32, tensors_48, tensors_64 = vis.load_images_by_resolution(root_dir)
train_tensors_by_resolution = {
    32: tensors_32,
    48: tensors_48,
    64: tensors_64,
}
# Print the size of each group
# print(f"32x32: Images {tensor_32[0].shape}, Labels {tensor_32[1].shape}")
# print(f"48x48: Images {tensor_48[0].shape}, Labels {tensor_48[1].shape}")
# print(f"64x64: Images {tensor_64[0].shape}, Labels {tensor_64[1].shape}")

In [3]:
model = vis.VariableInputNetwork(num_classes=10, N=81, pooling_type='max')
print(model)

# Test with variable input sizes
input_32 = torch.randn(8, 1, 32, 32)  # Batch of 8 images, resolution 32x32
input_48 = torch.randn(8, 1, 48, 48)  # Batch of 8 images, resolution 48x48
input_64 = torch.randn(8, 1, 64, 64)  # Batch of 8 images, resolution 64x64

output_32 = model(input_32)  # Should output (8, 10)
output_48 = model(input_48)  # Should output (8, 10)
output_64 = model(input_64)  # Should output (8, 10)

print(f"Output shape for 32x32 input: {output_32.shape}")
print(f"Output shape for 48x48 input: {output_48.shape}")
print(f"Output shape for 64x64 input: {output_64.shape}")

VariableInputNetwork(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(32, 81, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (global_pool): AdaptiveMaxPool2d(output_size=(1, 1))
  (fc): Linear(in_features=81, out_features=10, bias=True)
)
Output shape for 32x32 input: torch.Size([8, 10])
Output shape for 48x48 input: torch.Size([8, 10])
Output shape for 64x64 input: torch.Size([8, 10])


In [4]:
print(vis.count_parameters(model))

29029


In [5]:
results = vis.train_and_compare_pooling(train_tensors_by_resolution, test_tensors_by_resolution, num_epochs=5)



Training with MAX pooling:
Epoch 1/5
Training on resolution 32x32:
  Train Loss: 0.7848, Train Acc: 76.13%
Training on resolution 48x48:
  Train Loss: 0.1877, Train Acc: 94.49%
Training on resolution 64x64:
  Train Loss: 0.1434, Train Acc: 95.53%
--------------------------------------------------
Epoch 2/5
Training on resolution 32x32:
  Train Loss: 0.1203, Train Acc: 96.21%
Training on resolution 48x48:
  Train Loss: 0.0979, Train Acc: 97.06%
Training on resolution 64x64:
  Train Loss: 0.0854, Train Acc: 97.45%
--------------------------------------------------
Epoch 3/5
Training on resolution 32x32:
  Train Loss: 0.0826, Train Acc: 97.54%
Training on resolution 48x48:
  Train Loss: 0.0710, Train Acc: 97.69%
Training on resolution 64x64:
  Train Loss: 0.0665, Train Acc: 97.99%
--------------------------------------------------
Epoch 4/5
Training on resolution 32x32:
  Train Loss: 0.0686, Train Acc: 97.94%
Training on resolution 48x48:
  Train Loss: 0.0592, Train Acc: 98.20%
Training 