In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_sample_images

from utils import batch_plot
from conv import load_sample_filters

In [None]:
sample_images = np.asarray(load_sample_images().images)
sample_images = sample_images / sample_images.max()

# for better plot
n = 420
sample_images = sample_images[:, :n, :n, :]

batch_plot(sample_images, with_border=False, imgsize=6)

# Generate sliding window views of the image

In [None]:
# example of using numpy sliding_window_view: stride=1
batch, height, width, channel, window, stride, filters = 4, 7, 7, 3, 3, 1, 1
x = np.arange(batch * height * width * channel).reshape((batch, height, width, channel))
y = np.lib.stride_tricks.sliding_window_view(x, window_shape=(filters, window, window, channel)).squeeze(axis=(3, 4))

# (height - window)/stride + 1 = chunk_height
chunk_height, chunk_width = (height - window) // stride + 1, (width - window) // stride + 1
assert y.shape == (batch, chunk_height, chunk_width, window, window, channel)

# # low level operation
stride = 1
stride_batch, stride_height, stride_width, stride_channel = x.strides
z = np.lib.stride_tricks.as_strided(
    x,
    shape=(batch, chunk_height, chunk_width, window, window, channel),
    strides=(
        stride_batch,
        stride * stride_height,
        stride * stride_width,
        stride_height,
        stride_width,
        stride_channel,
    ),
)
assert np.allclose(y, z)

In [None]:
# plot nxn views of the image
n = 4
batch, height, width, channel = sample_images.shape
num_stride_height, num_stride_width = height // n, width // n
chunk_height, chunk_width = n, n
stride_batch, stride_height, stride_width, stride_channel = sample_images.strides
# (height - filter_height)/stride + 1 = chunk_height
filter_height, filter_width = height - num_stride_height * (chunk_height - 1), width - num_stride_width * (
    chunk_width - 1
)
chunks = np.lib.stride_tricks.as_strided(
    sample_images,
    shape=(batch, chunk_height, chunk_width, filter_height, filter_width, channel),
    strides=(
        stride_batch,
        num_stride_height * stride_height,
        num_stride_width * stride_width,
        stride_height,
        stride_width,
        stride_channel,
    ),
)
assert chunks.shape == (
    batch,
    chunk_height,
    chunk_width,
    filter_height,
    filter_width,
    channel,
)

In [12]:
sliding_sample_images = chunks.reshape((batch, -1, filter_height, filter_width, channel))
assert sliding_sample_images.shape == (
    batch,
    chunk_height * chunk_width,
    filter_height,
    filter_width,
    channel,
)
for sliding_sample_image in sliding_sample_images:
    batch_plot(
        sliding_sample_image,
        with_border=False,
        cmap=plt.cm.gray,
        tight_layout=None,
        wspace=0.01,
        hspace=0.01,
        imgsize=2,
    )

Error in callback <function _draw_all_if_interactive at 0x11dd1d260> (for post_execute):



KeyboardInterrupt


KeyboardInterrupt



ValueError: Image size of 82800x82800 pixels is too large. It must be less than 2^16 in each direction.

<Figure size 82800x82800 with 2302 Axes>

# Apply filters to the sliding window chunks

In [None]:
filter_height = filter_width = 7
sample_filters = load_sample_filters(size=filter_height, sigma=1, channel=channel)

batch_plot(
    list(sample_filters.values()),
    list(sample_filters.keys()),
    with_border=True,
    tight_layout=None,
    wspace=0.1,
    hspace=0.1,
    imgsize=4,
)

In [None]:
# chunked with stride of 1
batch, height, width, channel = sample_images.shape
filters = np.asarray(list(sample_filters.values()))
chunks = np.lib.stride_tricks.sliding_window_view(
    sample_images, window_shape=(1, filter_height, filter_width, channel)
).squeeze(axis=(3, 4))
chunk_height, chunk_width = height - filter_height + 1, width - filter_width + 1
assert chunks.shape == (
    batch,
    chunk_height,
    chunk_width,
    filter_height,
    filter_width,
    channel,
)

# chunks:                                                      (batch, chunk_height, chunk_width, filter_height, filter_width, channel)
# filters:                                                     (num_filters, filter_height, filter_width, channel)

# 1. step by step
# np.expand_dims(chunks, 3):                                   (batch, chunk_height, chunk_width, 1, filter_height, filter_width, channel)
# np.expand_dims(chunks, 3) * filters:                         (batch, chunk_height, chunk_width, num_filters, filter_height, filter_width, channel)
# np.expand_dims(chunks, 3) * filters).sum(axis=(-3,-2,-1)):   (batch, chunk_height, chunk_width, num_filters, channel)
# filtered_sample_images = (np.expand_dims(chunks, 3) * filters).sum(axis=(-3,-2,-1)).transpose((0,3,1,2))
# 2. tensordot
# filtered_sample_image = np.tensordot(chunks, filters, axes=((3,4,5), (1,2,3))).transpose((0,3,1,2))
# 3. einsum
# filtered_sample_images = np.einsum('mijklc,nklc->mnij',chunks,filters)

# 4. img2col
# filters.reshape((-1, filter_height*filter_width*channel)).T                                 (filter_height*filter_width*channel, num_filters)
# chunks.reshape((batch*chunk_height*chunk_width, filter_height*filter_width*channel))        (batch*chunk_height*chunk_width*channel, filter_height*filter_width*channel)
filtered_sample_images = (
    chunks.reshape((batch * chunk_height * chunk_width, filter_height * filter_width * channel))
    @ filters.reshape((-1, filter_height * filter_width * channel)).T
)
filtered_sample_images = filtered_sample_images.reshape((batch, chunk_height, chunk_width, -1)).transpose((0, 3, 1, 2))

assert filtered_sample_images.shape == (batch, len(filters), chunk_height, chunk_width)

for filtered_sample_image in filtered_sample_images:
    batch_plot(
        filtered_sample_image,
        list(sample_filters.keys()),
        with_border=False,
        cmap=plt.cm.gray,
        tight_layout=None,
        wspace=0.1,
        hspace=0.1,
        imgsize=6,
    )

In [None]:
from scipy import signal


def conv2d(images, kernels):
    _, kernel_height, kernel_width, kernel_channel = kernels.shape
    padding_height, padding_width = (kernel_height - 1) // 2, (kernel_width - 1) // 2
    # same padding
    images = np.pad(
        images, pad_width=((0, 0), (padding_height, padding_width), (padding_height, padding_width), (0, 0))
    )
    chunks = np.lib.stride_tricks.sliding_window_view(
        images, window_shape=(1, kernel_height, kernel_width, kernel_channel)
    ).squeeze(axis=(3, 4))
    return np.einsum("mijklc,nklc->mnij", chunks, kernels)


def fft_conv2d(images, kernels):
    batch_size, image_height, image_width, channel = images.shape
    res = np.zeros((batch_size, len(filters), image_height, image_width))

    # (num_kernels, 1, kernel_height, kernel_width, channel)
    kernels = np.expand_dims(np.rot90(kernels, k=2, axes=(1, 2)), axis=1)

    for i, k in enumerate(kernels):
        for c in range(channel):
            res[:, i, :, :] += signal.fftconvolve(images[..., c], k[..., c], mode="same")
    return res

In [None]:
random_filters = np.random.uniform(size=filters.shape)

assert np.allclose(conv2d(sample_images, filters), fft_conv2d(sample_images, filters))
assert np.allclose(conv2d(sample_images, random_filters), fft_conv2d(sample_images, random_filters))

In [None]:
%timeit conv2d(sample_images, filters)
%timeit fft_conv2d(sample_images, np.rot90(filters, k=2, axes=(1,2)))

- [An Intuitive Explanation of Convolutional Neural Networks](https://ujjwalkarn.me/2016/08/11/intuitive-explanation-convnets/)