## Benchmarking Scipy Signal vs cuSignal Time to Create Windows

In [1]:
import cusignal
from scipy import signal

#### General Parameters

In [2]:
# Num Points in Array - Reduce if getting out of memory errors
M = int(1e7)

#### Not Implemented
* Parzen
* Dolph-Chebyshev
* Slepian
* DPSS

Testing performed on 16GB NVIDIA GP100; Performance scales with data sizes, so presumably these scipy.signal vs cusignal benchmarks will increase with more GPU RAM and window sizes.

### General Cosine

In [3]:
%%timeit
HFT90D = [1, 1.942604, 1.340318, 0.440811, 0.043097]
cpu_window = signal.windows.general_cosine(M, HFT90D, sym=False)

1.07 s ± 2.27 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [4]:
%%timeit
HFT90D = [1, 1.942604, 1.340318, 0.440811, 0.043097]
gpu_window = cusignal.windows.general_cosine(M, HFT90D, sym=False)

6.98 ms ± 337 ns per loop (mean ± std. dev. of 7 runs, 100 loops each)


### Boxcar

In [5]:
%%timeit
cpu_window = signal.windows.boxcar(M)

22.6 ms ± 127 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [6]:
%%timeit
gpu_window = cusignal.windows.boxcar(M)

135 µs ± 6.67 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


### Triangular

In [7]:
%%timeit
cpu_window = signal.windows.triang(M)

80.9 ms ± 60.5 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [8]:
%%timeit
gpu_window = cusignal.windows.triang(M)

820 µs ± 38.2 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)


### Bohman

In [9]:
%%timeit
cpu_window = signal.windows.bohman(M)

521 ms ± 432 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [10]:
%%timeit
gpu_window = cusignal.windows.bohman(M)

3.44 ms ± 4.43 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


### Blackman

In [11]:
%%timeit
cpu_window = signal.windows.blackman(M)

620 ms ± 6.06 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [12]:
%%timeit
gpu_window = cusignal.windows.blackman(M)

4.28 ms ± 741 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)


### Nuttall

In [13]:
%%timeit
cpu_window = signal.windows.nuttall(M)

848 ms ± 5.99 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [14]:
%%timeit
gpu_window = cusignal.windows.nuttall(M)

5.63 ms ± 4.89 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


### Blackman-Harris

In [15]:
%%timeit
cpu_window = signal.windows.blackmanharris(M)

849 ms ± 7.69 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [16]:
%%timeit
gpu_window = cusignal.windows.blackmanharris(M)

5.64 ms ± 973 ns per loop (mean ± std. dev. of 7 runs, 100 loops each)


### Flat Top

In [17]:
%%timeit
cpu_window = signal.windows.flattop(M)

1.08 s ± 6.56 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [18]:
%%timeit
gpu_window = cusignal.windows.flattop(M)

6.98 ms ± 143 ns per loop (mean ± std. dev. of 7 runs, 100 loops each)


### Bartlett

In [19]:
%%timeit
cpu_window = signal.windows.bartlett(M)

185 ms ± 146 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [20]:
%%timeit
gpu_window = cusignal.windows.bartlett(M)

2.4 ms ± 6.94 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


### Hann

In [21]:
%%timeit
cpu_window = signal.windows.hann(M)

400 ms ± 1.02 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [22]:
%%timeit
gpu_window = cusignal.windows.hann(M)

2.95 ms ± 295 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)


### Tukey

In [23]:
%%timeit
cpu_window = signal.windows.tukey(M, alpha=0.5, sym=True)

178 ms ± 405 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [24]:
%%timeit
gpu_window = cusignal.windows.tukey(M, alpha=0.5, sym=True)

1.78 ms ± 1.74 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


### Bartlett-Hann

In [25]:
%%timeit
cpu_window = signal.windows.barthann(M)

366 ms ± 255 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [26]:
%%timeit
gpu_window = cusignal.windows.barthann(M)

2.97 ms ± 362 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)


### General Hamming

In [27]:
%%timeit
cpu_window = signal.windows.general_hamming(M, alpha=0.5, sym=True)

400 ms ± 476 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [28]:
%%timeit
gpu_window = cusignal.windows.general_hamming(M, alpha=0.5, sym=True)

2.95 ms ± 239 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)


### Hamming

In [29]:
%%timeit
cpu_window = signal.windows.hamming(M)

400 ms ± 305 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [30]:
%%timeit
gpu_window = cusignal.windows.hamming(M)

1.64 ms ± 46 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)


### Kaiser

In [31]:
%%timeit
cpu_window = signal.windows.kaiser(M, beta=0.5)

858 ms ± 527 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [32]:
%%timeit
gpu_window = cusignal.windows.kaiser(M, beta=0.5)

2.81 ms ± 4.99 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


### Gaussian

In [33]:
%%timeit
cpu_window = signal.windows.gaussian(M, std=7)

229 ms ± 409 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [34]:
%%timeit
gpu_window = cusignal.windows.gaussian(M, std=7)

1.92 ms ± 42.4 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)


### General Gaussian

In [35]:
%%timeit
cpu_window = signal.windows.general_gaussian(M, p=1.5, sig=7)

700 ms ± 950 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [36]:
%%timeit
gpu_window = cusignal.windows.general_gaussian(M, p=1.5, sig=7)

2.2 ms ± 36.8 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)


### Cosine

In [37]:
%%timeit
cpu_window = signal.windows.cosine(M)

205 ms ± 444 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [38]:
%%timeit
gpu_window = cusignal.windows.cosine(M)

1.04 ms ± 895 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)


### Exponential

In [39]:
%%timeit
cpu_window = signal.windows.exponential(M, tau=3.0)

229 ms ± 426 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [40]:
%%timeit
gpu_window = cusignal.windows.exponential(M, tau=3.0)

1.63 ms ± 616 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
