In [1]:
import numpy as np
from conv_direct import convolve_direct
from conv_fft import convolve_fft
from scipy import signal

## small N
direct matrix multiplication is faster.

In [2]:
image=np.random.randn(823,809)
kernel=np.random.randn(21,21)
mode='full'

In [3]:
%%time
result_fft=convolve_fft(image, kernel,mode=mode)

CPU times: user 2.06 s, sys: 112 ms, total: 2.17 s
Wall time: 1.77 s


In [4]:
%%time
result_direct=convolve_direct(image, kernel,mode=mode)[0,...]

CPU times: user 399 ms, sys: 7.84 ms, total: 406 ms
Wall time: 405 ms


In [5]:
%%time
result_true=signal.convolve(image,kernel,mode=mode)
print(f'scipy uses the "{signal.choose_conv_method(image,kernel)}" method to compute this convolution.\n')

scipy uses the "fft" method to compute this convolution.

CPU times: user 33 ms, sys: 80 µs, total: 33 ms
Wall time: 31.6 ms


In [6]:
print(np.allclose(result_true,result_fft))
print(np.allclose(result_true,result_direct))

True
True


## large N
fft method is faster, as direct matrix multiplication method grows in O(n^2). Note, dimension of full size convolution is (image size + kernel size - 1). fft will pad zeros to the next nearest power of 2. For example, if the size is 1025, fft will pad it to 2048, thus making it inefficient, a better approach is to divide image to chunks such that (chunk size + kernel size - 1) equals to a power of 2 (i.e. 256), then overlap the results and add them together.

In [7]:
image=np.random.randn(823,809)
kernel=np.random.randn(123,109)
mode='full'

In [8]:
%%time
result_fft=convolve_fft(image, kernel,mode=mode)

CPU times: user 2.09 s, sys: 8.83 ms, total: 2.1 s
Wall time: 1.68 s


In [9]:
%%time
result_direct=convolve_direct(image, kernel,mode=mode)[0,...]

CPU times: user 16.1 s, sys: 650 µs, total: 16.1 s
Wall time: 16.1 s


In [10]:
%%time
result_true=signal.convolve(image,kernel,mode=mode)
print(f'scipy uses the "{signal.choose_conv_method(image,kernel)}" method to compute this convolution.\n')

scipy uses the "fft" method to compute this convolution.

CPU times: user 29.8 ms, sys: 12 ms, total: 41.8 ms
Wall time: 40 ms


In [11]:
print(np.allclose(result_true,result_fft))
print(np.allclose(result_true,result_direct))

True
True
