In [192]:
import numpy as np

In [193]:
img = np.arange(9).reshape((3, 3)) + 1
img

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [194]:
def pad_array(array: np.ndarray) -> np.ndarray:
    h_padded = np.vstack((
        np.zeros((array.shape[1],)),
        array
    ))
    hv_padded = np.hstack((
        np.zeros((h_padded.shape[0], 1)),
        h_padded
    ))

    return hv_padded


def box_sum(img: np.ndarray, k: int) -> tuple[np.ndarray, np.ndarray]:
    h, w = img.shape
    size_y = h - k + 1
    size_x = w - k + 1
    hv_padded = pad_array(img)
    sums = np.zeros((size_y, size_x), dtype=np.int32)

    for y in range(1, hv_padded.shape[0]):
        running_sum = 0

        for x in range(1, hv_padded.shape[1]):
            # subtract the diagonal element to prevent double-counting
            running_sum += hv_padded[y, x] + hv_padded[y - 1, x] - hv_padded[y - 1, x - 1]
            hv_padded[y, x] = running_sum

            if y >= k and x >= k:
                box_sum = hv_padded[y, x] - hv_padded[y, x - k] - hv_padded[y - k, x] + hv_padded[y - k, x - k]
                sums[y - k, x - k] = box_sum

    return hv_padded, sums

In [195]:
pad_array(img)

array([[0., 0., 0., 0.],
       [0., 1., 2., 3.],
       [0., 4., 5., 6.],
       [0., 7., 8., 9.]])

In [196]:
k = 2

ii, sum = box_sum(img, k)

In [199]:
ii

array([[ 0.,  0.,  0.,  0.],
       [ 0.,  1.,  3.,  6.],
       [ 0.,  5., 12., 21.],
       [ 0., 12., 27., 45.]])

In [197]:
sum

array([[12, 16],
       [24, 28]], dtype=int32)

In [198]:
from numpy.lib.stride_tricks import sliding_window_view


def _ref_box_sum(img: np.ndarray, k: int) -> np.ndarray:
    w = sliding_window_view(img, (k, k))

    return w.astype(np.float64).sum(axis=(2, 3))

def run_box_sum_tests(box_sum_fn):
    rng = np.random.default_rng(42)
    cases = [
        (rng.integers(0, 10,  (3, 3),  dtype=np.int32),  [2]),
        (rng.integers(0, 255, (5, 7),  dtype=np.uint8),  [2, 3]),
        (rng.integers(-50, 50, (16, 10), dtype=np.int32), [3, 4]),
        (rng.integers(0, 1000,(12, 12), dtype=np.int64), [6]),
    ]

    for idx, (img, ks) in enumerate(cases, 1):
        for k in ks:
            ref = _ref_box_sum(img, k)
            _, out = box_sum_fn(img, k)
            np.testing.assert_allclose(
                np.asarray(out, dtype=np.float64),
                np.asarray(ref, dtype=np.float64),
                rtol=0, atol=0
            )
    print("All box_sum tests passed on 5 matrices and multiple window sizes.")


run_box_sum_tests(box_sum)

All box_sum tests passed on 5 matrices and multiple window sizes.
