In [2]:
import torch
import torch.nn.functional as F
from torch import Tensor
from torch.nn.functional import mse_loss

In [12]:
image = torch.arange(0, 1 * 2 * 5 * 5, dtype=torch.float32)
image = torch.reshape(image, (2, 1, 5, 5))
print(image)

tensor([[[[ 0.,  1.,  2.,  3.,  4.],
          [ 5.,  6.,  7.,  8.,  9.],
          [10., 11., 12., 13., 14.],
          [15., 16., 17., 18., 19.],
          [20., 21., 22., 23., 24.]]],


        [[[25., 26., 27., 28., 29.],
          [30., 31., 32., 33., 34.],
          [35., 36., 37., 38., 39.],
          [40., 41., 42., 43., 44.],
          [45., 46., 47., 48., 49.]]]])


In [13]:
def get_sobel_kernel_3x3() -> Tensor:
    """Utility function that returns a sobel kernel of 3x3."""
    return torch.tensor([[-1.0, 0.0, 1.0], [-2.0, 0.0, 2.0], [-1.0, 0.0, 1.0]])


def get_sobel_kernel2d() -> Tensor:
    kernel_x = get_sobel_kernel_3x3()
    kernel_y = kernel_x.transpose(0, 1)
    return torch.stack([kernel_x, kernel_y])


sobel_kernels = get_sobel_kernel2d()
print(sobel_kernels)
print(sobel_kernels.shape)
tmp_kernel = sobel_kernels[:, None, ...]
print(tmp_kernel.shape)

tensor([[[-1.,  0.,  1.],
         [-2.,  0.,  2.],
         [-1.,  0.,  1.]],

        [[-1., -2., -1.],
         [ 0.,  0.,  0.],
         [ 1.,  2.,  1.]]])
torch.Size([2, 3, 3])
torch.Size([2, 1, 3, 3])


In [14]:
def spatial_gradient(input: Tensor) -> Tensor:
    r"""
    Args:
        input: input image tensor with shape :math:`(B, C, H, W)`.
        mode: derivatives modality, can be: `sobel` or `diff`.
        order: the order of the derivatives.
        normalized: whether the output is normalized.

    Return:
        the derivatives of the input feature map. with shape :math:`(B, C, 2, H, W)`
    """

    kernel = get_sobel_kernel2d()

    # prepare kernel
    b, c, h, w = input.shape
    tmp_kernel = kernel[:, None, ...]
    # print(tmp_kernel)
    # Pad with "replicate for spatial dims, but with zeros for channel
    spatial_pad = [
        kernel.size(1) // 2,
        kernel.size(1) // 2,
        kernel.size(2) // 2,
        kernel.size(2) // 2,
    ]
    # print(spatial_pad)
    out_channels = 2
    # print(input.reshape(b * c, 1, h, w))
    padded_inp = F.pad(input.reshape(b * c, 1, h, w), spatial_pad, "replicate")
    # print(padded_inp)
    out = F.conv2d(padded_inp, tmp_kernel, groups=1, padding=0, stride=1)
    return out.reshape(b, c, out_channels, h, w)

In [15]:
spatial_gradient(image)

tensor([[[[[ 4.,  8.,  8.,  8.,  4.],
           [ 4.,  8.,  8.,  8.,  4.],
           [ 4.,  8.,  8.,  8.,  4.],
           [ 4.,  8.,  8.,  8.,  4.],
           [ 4.,  8.,  8.,  8.,  4.]],

          [[20., 20., 20., 20., 20.],
           [40., 40., 40., 40., 40.],
           [40., 40., 40., 40., 40.],
           [40., 40., 40., 40., 40.],
           [20., 20., 20., 20., 20.]]]],



        [[[[ 4.,  8.,  8.,  8.,  4.],
           [ 4.,  8.,  8.,  8.,  4.],
           [ 4.,  8.,  8.,  8.,  4.],
           [ 4.,  8.,  8.,  8.,  4.],
           [ 4.,  8.,  8.,  8.,  4.]],

          [[20., 20., 20., 20., 20.],
           [40., 40., 40., 40., 40.],
           [40., 40., 40., 40., 40.],
           [40., 40., 40., 40., 40.],
           [20., 20., 20., 20., 20.]]]]])

In [16]:
def sobel(input: Tensor, eps: float = 1e-6) -> Tensor:
    r"""

    Args:
        input: the input image with shape :math:`(B,C,H,W)`.
        normalized: if True, L1 norm of the kernel is set to 1.
        eps: regularization number to avoid NaN during backprop.

    Return:
        the sobel edge gradient magnitudes map with shape :math:`(B,C,H,W)`.
    """
    if not len(input.shape) == 4:
        raise ValueError(f"Invalid input shape, we expect BxCxHxW. Got: {input.shape}")

    # comput the x/y gradients
    edges = spatial_gradient(input)

    # unpack the edges
    gx = edges[:, :, 0]
    gy = edges[:, :, 1]

    # compute gradient maginitude
    magnitude = torch.sqrt(gx * gx + gy * gy + eps)

    return magnitude

In [None]:
sobel(image).shape

In [3]:
input = torch.randn((2, 1, 5, 5)).cuda()
output = torch.randn((2, 1, 5, 5)).cuda()
distance = mse_loss(input, output, reduction="mean")

print(input.shape)
print(output.shape)
print(distance.shape)
print(distance.item())
import numpy as np

d = (np.square(input.numpy() - output.numpy())).mean(axis=None)
print(d)
# s = sobel(output)

torch.Size([2, 1, 5, 5])
torch.Size([2, 1, 5, 5])
torch.Size([])
1.8520668745040894


TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [19]:
s.shape

torch.Size([2, 1, 5, 5])

In [20]:
image

tensor([[[[ 0.,  1.,  2.,  3.,  4.],
          [ 5.,  6.,  7.,  8.,  9.],
          [10., 11., 12., 13., 14.],
          [15., 16., 17., 18., 19.],
          [20., 21., 22., 23., 24.]]],


        [[[25., 26., 27., 28., 29.],
          [30., 31., 32., 33., 34.],
          [35., 36., 37., 38., 39.],
          [40., 41., 42., 43., 44.],
          [45., 46., 47., 48., 49.]]]])

In [29]:
torch.mean(image)

tensor([[[ 2.,  7., 12., 17., 22.]],

        [[27., 32., 37., 42., 47.]]])

In [32]:
a = [i for i in range(25, 50)]
b = [i for i in range(0, 25)]
c = ((sum(a) / len(a)) + (sum(b) / len(b))) / 2

In [33]:
c

24.5