# Kernel padding experiments

In [4]:
import torch
import torch.nn.functional as F

In [145]:
out_channels = 1
in_channels = 1
kernel_size = (3,3)
weights = torch.rand(out_channels,in_channels, kernel_size[0], kernel_size[1])
weights

tensor([[[[0.0911, 0.7267, 0.3667],
          [0.6877, 0.5025, 0.1365],
          [0.0259, 0.6638, 0.1500]]]])

In [146]:
padded = F.pad(weights, [1, 1, 1, 1])
padded

tensor([[[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0911, 0.7267, 0.3667, 0.0000],
          [0.0000, 0.6877, 0.5025, 0.1365, 0.0000],
          [0.0000, 0.0259, 0.6638, 0.1500, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]]])

In [147]:
padded_upscale = padded[:, :, 1:, 1:] + padded[:, :, 1:, :-1] + padded[:, :, :-1, 1:] + padded[:, :, :-1, :-1]
padded_upscale[]

tensor([[[[0.0911, 0.8179, 1.0934, 0.3667],
          [0.7788, 2.0080, 1.7323, 0.5032],
          [0.7136, 1.8799, 1.4527, 0.2864],
          [0.0259, 0.6897, 0.8138, 0.1500]]]])

In [148]:
w_mat = weights.view(weights.size(0), -1)
w_mat

tensor([[0.0911, 0.7267, 0.3667, 0.6877, 0.5025, 0.1365, 0.0259, 0.6638, 0.1500]])

In [151]:
from utils import max_singular_value
u = torch.FloatTensor(1, w_mat.size(0)).normal_(0, 1).cpu()
sigma, _u = max_singular_value(w_mat, u, 1)
sigma

tensor(1.3707)

In [152]:
padded_upscale/sigma

tensor([[[[0.0665, 0.5967, 0.7977, 0.2675],
          [0.5682, 1.4650, 1.2639, 0.3671],
          [0.5206, 1.3715, 1.0598, 0.2090],
          [0.0189, 0.5032, 0.5937, 0.1094]]]])

In [None]:
weights = weights[:, :, 1:, 1:] + weights[:, :, 1:, :-
                                            1] + weights[:, :, :-1, 1:] + weights[:, :, :-1, :-1]
w_mat = weights.view(weights.size(0), -1)
sigma, _u = max_singular_value(w_mat, self.u, 1)
self.u.copy_(_u)
return weights / sigma

# _output_padding experiments

In [49]:
from typing import List

def _output_padding(input, output_padding, output_size, stride, padding, kernel_size, dilation=None):
        # type: (Tensor, Optional[List[int]], List[int], List[int], List[int], Optional[List[int]]) -> List[int]
        if output_size is None:
            ret = tuple(output_padding)  # converting to list if was not already
        else:
            k = input.dim() - 2
            if len(output_size) == k + 2:
                output_size = output_size[2:]
            if len(output_size) != k:
                raise ValueError(
                    "output_size must have {} or {} elements (got {})"
                    .format(k, k + 2, len(output_size)))

            min_sizes = torch.jit.annotate(List[int], [])
            max_sizes = torch.jit.annotate(List[int], [])
            for d in range(k):
                dim_size = ((input.size(d + 2) - 1) * stride[d] -
                            2 * padding[d] +
                            (dilation[d] if dilation is not None else 1) * (kernel_size[d] - 1) + 1)
                min_sizes.append(dim_size)
                max_sizes.append(min_sizes[d] + stride[d] - 1)

            for i in range(len(output_size)):
                size = output_size[i]
                min_size = min_sizes[i]
                max_size = max_sizes[i]
                if size < min_size or size > max_size:
                    raise ValueError((
                        "requested an output size of {}, but valid sizes range "
                        "from {} to {} (for an input of {})").format(
                            output_size, min_sizes, max_sizes, input.size()[2:]))

            res = torch.jit.annotate(List[int], [])
            for d in range(k):
                res.append(output_size[d] - min_sizes[d])

            ret = res
        return ret

In [107]:
input_x = 256
input_y = 256

inputs = torch.zeros(3, 10, input_x, input_y)
inputs.dim()
output_padding = _output_padding(input, output_padding=[0,0], output_size=[input_x * 2,input_y * 2], stride=[2,2], padding=[1,1], kernel_size=[3,3], dilation=None)

output_padding

[1, 1]

In [134]:
from tqdm import tqdm

for stride in range(2,3):
    for padding in range(0,4):
        for dilation in range(1,2):
            for kernel_size in range(1,6):
                failure = False
                sizes = []
                for input_size in range(0, 500):
                    output_size = input_size * 2
                    min_size = (input_size - 1) * stride - 2 * padding + dilation * (kernel_size - 1) + 1
                    max_size = min_size + stride - 1
                    if output_size < min_size or output_size > max_size:
                        failure = True
                        sizes.append(input_x)

                message = 'fail' if failure else 'success'
                print(f'{message} stride: {stride}, padding: {padding}, dilation: {dilation}, kernel_size: {kernel_size}, failed sizes: {len(sizes)}')
        
       

success stride: 2, padding: 0, dilation: 1, kernel_size: 1, failed sizes: 0
success stride: 2, padding: 0, dilation: 1, kernel_size: 2, failed sizes: 0
fail stride: 2, padding: 0, dilation: 1, kernel_size: 3, failed sizes: 500
fail stride: 2, padding: 0, dilation: 1, kernel_size: 4, failed sizes: 500
fail stride: 2, padding: 0, dilation: 1, kernel_size: 5, failed sizes: 500
fail stride: 2, padding: 1, dilation: 1, kernel_size: 1, failed sizes: 500
fail stride: 2, padding: 1, dilation: 1, kernel_size: 2, failed sizes: 500
success stride: 2, padding: 1, dilation: 1, kernel_size: 3, failed sizes: 0
success stride: 2, padding: 1, dilation: 1, kernel_size: 4, failed sizes: 0
fail stride: 2, padding: 1, dilation: 1, kernel_size: 5, failed sizes: 500
fail stride: 2, padding: 2, dilation: 1, kernel_size: 1, failed sizes: 500
fail stride: 2, padding: 2, dilation: 1, kernel_size: 2, failed sizes: 500
fail stride: 2, padding: 2, dilation: 1, kernel_size: 3, failed sizes: 500
fail stride: 2, paddi

from https://stats.stackexchange.com/questions/297678/how-to-calculate-optimal-zero-padding-for-convolutional-neural-networks

The possible values for the padding size, 𝑃, depends the input size 𝑊 (following the notation of the blog), the filter size 𝐹 and the stride 𝑆. We assume width and height are the same.

What you need to ensure is that the output size, (𝑊−𝐹+2𝑃)/𝑆+1, is an integer. When 𝑆=1 then you get your first equation 𝑃=(𝐹−1)/2 as necessary condition. But, in general, you need to consider the three parameters, namely 𝑊, 𝐹 and 𝑆 in order to determine valid values of 𝑃.


# Same padding experiments

In [115]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor
from typing import Union

def same_padding(conv_layer: Union[nn.Conv2d, nn.ConvTranspose2d], inputs: Tensor):
    in_height = inputs.size(2)
    in_width = inputs.size(3)
    dilation = conv_layer.dilation
    kernel_size = conv_layer.kernel_size
    filter_size = (dilation[0] * (kernel_size[0] - 1) + 1, dilation[1] * (kernel_size[1] - 1) + 1)

    print((in_height, in_width))
    print(filter_size)

    pad_along_height = dilation[0] * (kernel_size[0] - 1)
    pad_along_width = dilation[1] * (kernel_size[1] - 1)

    pad_top = pad_along_height // 2
    pad_bottom = pad_along_height - pad_top
    pad_left = pad_along_width // 2
    pad_right = pad_along_width - pad_left

    print(f'height: {pad_along_height}, width: {pad_along_width}')
    print(f'bottom: {pad_bottom}, top: {pad_top}, left: {pad_left}, right: {pad_right}')

    return F.pad(inputs, (pad_left, pad_right, pad_top, pad_bottom))

In [117]:
inputs = torch.rand((64, 3, 122, 51))

conv_layer = nn.Conv2d(in_channels=3,out_channels=3,kernel_size=(3,5), stride=1, dilation=1)

padded_inputs = same_padding(conv_layer, inputs)
output = conv_layer(padded_inputs)

print(inputs.size())
print(padded_inputs.size())
print(output.size())

(122, 51)
(3, 5)
height: 2, width: 4
bottom: 1, top: 1, left: 2, right: 2
torch.Size([64, 3, 122, 51])
torch.Size([64, 3, 124, 55])
torch.Size([64, 3, 122, 51])
