In [1]:
import numpy as np
from conv_direct import convolve_direct
from conv_fft import convolve_fft
from scipy import signal

## small N
direct matrix multiplication is faster.

In [2]:
image=np.random.randn(823,809)
kernel=np.random.randn(21,21)
mode='full'

In [3]:
%%time
result_fft=convolve_fft(image, kernel,mode=mode)

CPU times: user 2.19 s, sys: 80.8 ms, total: 2.27 s
Wall time: 1.87 s


In [4]:
%%time
result_direct=convolve_direct(image, kernel,mode=mode)[0,...]

CPU times: user 421 ms, sys: 270 µs, total: 421 ms
Wall time: 419 ms


In [5]:
%%time
result_true=signal.convolve(image,kernel,mode=mode)
print(f'scipy uses the "{signal.choose_conv_method(image,kernel)}" method to compute this convolution.\n')

scipy uses the "fft" method to compute this convolution.

CPU times: user 37.1 ms, sys: 0 ns, total: 37.1 ms
Wall time: 34.6 ms


In [6]:
print(np.allclose(result_true,result_fft))
print(np.allclose(result_true,result_direct))

True
True


## large N
fft method is faster, as direct matrix multiplication method grows in O(n^2). Note, dimension of full size convolution is (image size + kernel size - 1). fft will pad zeros to the next nearest power of 2. For example, if the size is 1025, fft will pad it to 2048, thus making it inefficient, a better approach is to divide image to chunks such that (chunk size + kernel size - 1) equals to a power of 2 (i.e. 256), then overlap the results and add them together.

In [7]:
image=np.random.randn(823,809)
kernel=np.random.randn(123,109)
mode='full'

In [8]:
%%time
result_fft=convolve_fft(image, kernel,mode=mode)

CPU times: user 2.01 s, sys: 28.3 ms, total: 2.04 s
Wall time: 1.62 s


In [9]:
%%time
result_direct=convolve_direct(image, kernel,mode=mode)[0,...]

CPU times: user 16 s, sys: 0 ns, total: 16 s
Wall time: 16 s


In [10]:
%%time
result_true=signal.convolve(image,kernel,mode=mode)
print(f'scipy uses the "{signal.choose_conv_method(image,kernel)}" method to compute this convolution.\n')

scipy uses the "fft" method to compute this convolution.

CPU times: user 42.4 ms, sys: 3.58 ms, total: 45.9 ms
Wall time: 43.9 ms


In [11]:
print(np.allclose(result_true,result_fft))
print(np.allclose(result_true,result_direct))

True
True
