In [None]:
import numpy as np
from argparse import Namespace
import matplotlib.pyplot as plt

from bench import Convolution


%load_ext autoreload
%autoreload 2

In [None]:
configs = Namespace(
    padding="same",
    batch_size=8,
    image_height=64,
    image_width=64,
    image_channels=4,
    num_kernels=16,
    small_kernel_height=3,
    small_kernel_width=3,
    large_kernel_height=19,
    large_kernel_width=19,
    conv_methods=["naive", "fft", "tensordot", "einsum", "img2col"],
)


def test_and_bench(x, w, convs):
    c1 = convs[0](x, w)
    for c in convs[1:]:
        o = c(x, w)
        if c.padding == "same" and x.ndim == 2 and o.ndim == 2:
            assert x.shape == o.shape, f"same padding should maintain shape. but {x.shape} != {o.shape}"
        np.testing.assert_allclose(c1, o)

    times = {}
    for c in convs:
        print(f'{"="*20}  {c}')
        t = %timeit -o c(x, w)
        times[c.method] = t.average * 1e3  # millisecond
    return times


def plot_run_time(bench, width=0.15, kernels=None):
    if kernels is None:
        kernels = ["small kernel", "large kernel"]

    bench_values = np.asarray(list(bench.values()))
    min_small_kernel_index = np.argmin(bench_values[:, 0])
    min_large_kernel_index = np.argmin(bench_values[:, 1])
    min_small_kernel_method = list(bench.keys())[min_small_kernel_index]
    min_large_kernel_method = list(bench.keys())[min_large_kernel_index]

    locations = np.arange(len(kernels))
    fig, ax1 = plt.subplots(layout="constrained")
    ax2 = ax1.twinx()
    for multiplier, (method, measurement) in enumerate(bench.items()):
        offset = width * multiplier
        rec1 = ax1.bar(locations[0] + offset, measurement[0], width, label=method)
        rec2 = ax2.bar(locations[1] + offset, measurement[1], width, label=method)
        ax1.bar_label(rec1, padding=3, fmt=lambda _: method if method == min_small_kernel_method else "")
        ax2.bar_label(rec2, padding=3, fmt=lambda _: method if method == min_large_kernel_method else "")

    ax1.set_ylabel("Small Kernel Avg Run Time (ms)")
    ax2.set_ylabel("Large Kernel Avg Run Time (ms)")
    ax1.set_xticks(locations + 2 * width, kernels)
    ax1.legend(loc="upper left", ncols=1)
    plt.show()


assert np.all(
    np.asarray(
        [
            configs.small_kernel_height,
            configs.small_kernel_width,
            configs.large_kernel_height,
            configs.large_kernel_width,
        ]
    )
    % 2
    == 1
), "filter size should be odd."

# 1. gray scale

## 1.1 single instance single kernel

### 1.1.1 small size kernel

In [None]:
convs = [Convolution(padding=configs.padding, method=method, channel=None) for method in configs.conv_methods]

x = np.random.uniform(size=(configs.image_height, configs.image_width))
w = np.random.uniform(size=(configs.small_kernel_height, configs.small_kernel_width))

small_kernel_bench = test_and_bench(x, w, convs)

### 1.1.2 large size kernel

In [None]:
x = np.random.uniform(size=(configs.image_height, configs.image_width))
w = np.random.uniform(size=(configs.large_kernel_height, configs.large_kernel_width))

large_kernel_bench = test_and_bench(x, w, convs)

### 1.1.3 draw run time

In [None]:
bench = {m: [small_kernel_bench[m], large_kernel_bench[m]] for m in configs.conv_methods}
plot_run_time(bench)

## 1.2 multi instance single kernel

### 1.2.1 small size kernel

In [None]:
x = np.random.uniform(size=(configs.batch_size, configs.image_height, configs.image_width))
w = np.random.uniform(size=(configs.small_kernel_height, configs.small_kernel_width))

small_kernel_bench = test_and_bench(x, w, convs)

### 1.2.2 large size kernel

In [None]:
x = np.random.uniform(size=(configs.batch_size, configs.image_height, configs.image_width))
w = np.random.uniform(size=(configs.large_kernel_height, configs.large_kernel_width))

large_kernel_bench = test_and_bench(x, w, convs)

### 1.2.3 draw run time

In [None]:
bench = {m: [small_kernel_bench[m], large_kernel_bench[m]] for m in configs.conv_methods}
plot_run_time(bench)

## 1.3 multi instance multi kernels

### 1.3.1 small size kernel

In [None]:
x = np.random.uniform(size=(configs.batch_size, configs.image_height, configs.image_width))
w = np.random.uniform(size=(configs.num_kernels, configs.small_kernel_height, configs.small_kernel_width))

small_kernel_bench = test_and_bench(x, w, convs)

### 1.3.2 large size kernel

In [None]:
x = np.random.uniform(size=(configs.batch_size, configs.image_height, configs.image_width))
w = np.random.uniform(size=(configs.num_kernels, configs.large_kernel_height, configs.large_kernel_width))

large_kernel_bench = test_and_bench(x, w, convs)

### 1.3.3 draw run time

In [None]:
bench = {m: [small_kernel_bench[m], large_kernel_bench[m]] for m in configs.conv_methods}
plot_run_time(bench)

# 2. rgb scale (channel first)

## 2.1 single instance single kernel

### 2.1.1 small size kernel

In [None]:
convs = [Convolution(padding=configs.padding, method=method, channel="first") for method in configs.conv_methods]

x = np.random.uniform(size=(configs.image_channels, configs.image_height, configs.image_width))
w = np.random.uniform(size=(configs.image_channels, configs.small_kernel_height, configs.small_kernel_width))

small_kernel_bench = test_and_bench(x, w, convs)

### 2.1.2 large size kernel

In [None]:
x = np.random.uniform(size=(configs.image_channels, configs.image_height, configs.image_width))
w = np.random.uniform(size=(configs.image_channels, configs.large_kernel_height, configs.large_kernel_width))

large_kernel_bench = test_and_bench(x, w, convs)

### 2.1.3 draw run time

In [None]:
bench = {m: [small_kernel_bench[m], large_kernel_bench[m]] for m in configs.conv_methods}
plot_run_time(bench)

## 2.2 multi instance single kernel

### 2.2.1 small size kernel

In [None]:
x = np.random.uniform(size=(configs.batch_size, configs.image_channels, configs.image_height, configs.image_width))
w = np.random.uniform(size=(configs.image_channels, configs.small_kernel_height, configs.small_kernel_width))

small_kernel_bench = test_and_bench(x, w, convs)

### 2.2.2 large size kernel

In [None]:
x = np.random.uniform(size=(configs.batch_size, configs.image_channels, configs.image_height, configs.image_width))
w = np.random.uniform(size=(configs.image_channels, configs.large_kernel_height, configs.large_kernel_width))

large_kernel_bench = test_and_bench(x, w, convs)

### 2.2.3 draw run time

In [None]:
bench = {m: [small_kernel_bench[m], large_kernel_bench[m]] for m in configs.conv_methods}
plot_run_time(bench)

## 2.3 multi instance multi kernels

### 2.3.1 small size kernel

In [None]:
x = np.random.uniform(size=(configs.batch_size, configs.image_channels, configs.image_height, configs.image_width))
w = np.random.uniform(
    size=(configs.num_kernels, configs.image_channels, configs.small_kernel_height, configs.small_kernel_width)
)

small_kernel_bench = test_and_bench(x, w, convs[1:])

### 2.3.2 large size kernel

In [None]:
x = np.random.uniform(size=(configs.batch_size, configs.image_channels, configs.image_height, configs.image_width))
w = np.random.uniform(
    size=(configs.num_kernels, configs.image_channels, configs.large_kernel_height, configs.large_kernel_width)
)

large_kernel_bench = test_and_bench(x, w, convs[1:])

### 2.3.3 draw run time

In [None]:
bench = {m: [small_kernel_bench[m], large_kernel_bench[m]] for m in configs.conv_methods[1:]}
plot_run_time(bench)

# 3. rgb scale (channel last)

## 3.1 single instance single kernel

### 3.1.1 small size kernel

In [None]:
convs = [Convolution(padding=configs.padding, method=method, channel="last") for method in configs.conv_methods]

x = np.random.uniform(size=(configs.image_height, configs.image_width, configs.image_channels))
w = np.random.uniform(size=(configs.small_kernel_height, configs.small_kernel_width, configs.image_channels))

small_kernel_bench = test_and_bench(x, w, convs)

### 3.1.2 large size kernel

In [None]:
x = np.random.uniform(size=(configs.image_height, configs.image_width, configs.image_channels))
w = np.random.uniform(size=(configs.large_kernel_height, configs.large_kernel_width, configs.image_channels))

large_kernel_bench = test_and_bench(x, w, convs)

### 3.1.3 draw run time

In [None]:
bench = {m: [small_kernel_bench[m], large_kernel_bench[m]] for m in configs.conv_methods}
plot_run_time(bench)

## 3.2 multi instance single kernel

### 3.2.1 small size kernel

In [None]:
x = np.random.uniform(size=(configs.batch_size, configs.image_height, configs.image_width, configs.image_channels))
w = np.random.uniform(size=(configs.small_kernel_height, configs.small_kernel_width, configs.image_channels))

small_kernel_bench = test_and_bench(x, w, convs)

### 3.2.2 large size kernel

In [None]:
x = np.random.uniform(size=(configs.batch_size, configs.image_height, configs.image_width, configs.image_channels))
w = np.random.uniform(size=(configs.large_kernel_height, configs.large_kernel_width, configs.image_channels))

large_kernel_bench = test_and_bench(x, w, convs)

### 3.2.3 draw run time

In [None]:
bench = {m: [small_kernel_bench[m], large_kernel_bench[m]] for m in configs.conv_methods}
plot_run_time(bench)

## 3.3 multi instance multi kernels

### 3.3.1 small size kernel

In [None]:
x = np.random.uniform(size=(configs.batch_size, configs.image_height, configs.image_width, configs.image_channels))
w = np.random.uniform(
    size=(configs.num_kernels, configs.small_kernel_height, configs.small_kernel_width, configs.image_channels)
)

small_kernel_bench = test_and_bench(x, w, convs[1:])

### 3.3.2 large size kernel

In [None]:
x = np.random.uniform(size=(configs.batch_size, configs.image_height, configs.image_width, configs.image_channels))
w = np.random.uniform(
    size=(configs.num_kernels, configs.large_kernel_height, configs.large_kernel_width, configs.image_channels)
)

large_kernel_bench = test_and_bench(x, w, convs[1:])

### 3.3.3 draw run time

In [None]:
bench = {m: [small_kernel_bench[m], large_kernel_bench[m]] for m in configs.conv_methods[1:]}
plot_run_time(bench)