In [None]:
import os
import sys
import math
import shutil
import datetime
import multiprocessing
import notebook
from distutils import ccompiler

In [None]:
print(sys.executable)
print('Python', sys.version, end='\n\n')

for module in ('IPython notebook ipywidgets widgetsnbextension numpy '
               'scipy matplotlib skimage numba cupy h5py Cython dask '
               'tifffile'.split()):
    try:
        __import__(module)
    except Exception:
        continue
    lib = sys.modules[module]
    print(module.lower(), getattr(lib, '__version__', 'Unknown'))

print('\nCompiler type:', ccompiler.new_compiler().compiler_type, end='\n\n')
print(multiprocessing.cpu_count(), 'CPU cores')

try:
    import psutil
    print('{:.0f} GB main memory\n'.format(psutil.virtual_memory()[0]/2**30))
except ImportError:
    pass

In [None]:
from matplotlib import pyplot as plt

In [None]:
import time
import numpy
import pyfftw
import multiprocessing
nthread = multiprocessing.cpu_count()
a = numpy.random.rand(2364,2756).astype('complex128')
""" 
Uncomment below to use 32 bit floats, 
increasing the speed by a factor of 4
and remove the difference between the "builders" and "FFTW" methods
"""
a = numpy.random.rand(2364,2756).astype('complex64')

start = time.time()
b1 = numpy.fft.fft2(a)
end1 = time.time() - start

start = time.time()
b2 = pyfftw.interfaces.scipy_fftpack.fft2(a, threads=nthread)
end2 = time.time() - start

pyfftw.forget_wisdom()
start = time.time()
b3 = pyfftw.interfaces.numpy_fft.fft2(a, threads=nthread)
end3 = time.time() - start

""" 
For large arrays avoiding the copy is very important, 
doing this I get a speedup of 2x compared to not using it 
"""
pyfftw.forget_wisdom()
start = time.time()
b5 = numpy.zeros_like(a)
fft = pyfftw.builders.fft2(a, overwrite_input=True, planner_effort='FFTW_ESTIMATE', threads=multiprocessing.cpu_count())
#fft = pyfftw.builders.fft2(a, s=None, axes=(-2, -1), overwrite_input=False, planner_effort='FFTW_MEASURE', threads=nthread, auto_align_input=False, auto_contiguous=False, avoid_copy=True)
b5 = fft()
end5 = time.time() - start

print('numpy.fft.fft2:                        %.3f secs.' % end1)
print('pyfftw.interfaces.scipy_fftpack.fft2:  %.3f secs.' % end2)
print('pyfftw.interfaces.numpy_fft.fft2:      %.3f secs.' % end3)
print('pyfftw.builders:                       %.3f secs.' % end5)

In [1]:
import numpy as np
import pyfftw
import multiprocessing

nthread = multiprocessing.cpu_count()

a = np.random.rand(1024*1024).astype('complex64')
fft = pyfftw.builders.fft(a, overwrite_input=True, planner_effort='FFTW_ESTIMATE', threads=multiprocessing.cpu_count())

%time np.fft.fft(a)
%time pyfftw.interfaces.scipy_fftpack.fft(a, threads=nthread)
%time pyfftw.interfaces.numpy_fft.fft(a, threads=nthread)
%time fft()

CPU times: user 36.2 ms, sys: 16.1 ms, total: 52.3 ms
Wall time: 52.8 ms
CPU times: user 56.4 ms, sys: 175 Âµs, total: 56.6 ms
Wall time: 24.8 ms
CPU times: user 53.1 ms, sys: 0 ns, total: 53.1 ms
Wall time: 19.1 ms
CPU times: user 48.2 ms, sys: 0 ns, total: 48.2 ms
Wall time: 14.8 ms


array([ 5.2467006e+05  +0.j      , -3.6651361e+02-286.94296j ,
        1.8795923e+02+128.50076j , ...,  9.7990852e+01 +11.404587j,
        1.8795924e+02-128.50069j , -3.6651358e+02+286.94296j ],
      dtype=complex64)

In [None]:
# import numpy as np
# import pyfftw
# import multiprocessing

# nthread = multiprocessing.cpu_count()


# K = 2**18
# Llx = 10.
# KT = 2*K
# dx = Llx/np.float64(K)
# X = np.arange(-Llx,Llx,dx)
# a = pyfftw.n_byte_align_empty(KT, 16, 'complex64')
# b = pyfftw.n_byte_align_empty(KT, 16, 'complex64')
# fft_object = pyfftw.FFTW(a,b)
# a[:] = 2.*np.cosh(X)**(-2)

In [None]:
# %timeit np.fft.fft(a)
# %timeit fft_object(a)