# Cython Testing

Testing compilation and running of cython module

##Notes
- Copied from labeyrieClasses : 48s
- With all extra fluff and if statements removed: 47s
- Casting to np.float32 instead of float: 22.9s
- Casting entire 1000x512x512 fits cube to np.float32 takes 0.855s
    - Casting entire 1000x512x512 fits cube to float takes 18.5s
- No significant gain from casting the entire fits cube first (rather than image by image)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.fftpack import fft2,ifft2,fftshift
from labeyrieClasses import target

%matplotlib inline
%load_ext cython

# Comparison of bare Python to Cython for preprocessing FITS data
binary = target()
binary.fits.fileName = "/home/niels/Documents/FITS/KP330.fits"

In [None]:
%%timeit -n 1 -r 1
binary.fits.read(numDimensions=3)
# Preprocess .FITS with Python, time it
# Command copied from labeyrieClasses

if (len(binary.fits.data.shape) == 3):
    # Generate empty array the size of an image to be used to accumulate
    #  PSD values before averaging.
    psdSum = np.zeros(binary.fits.data.shape[1:3])

    imgNum = np.shape(binary.fits.data)[0] # Number of images
    imgIncrement = imgNum/20 # How often to display a status message

    # Looping through all images in cube
    for index,img in enumerate(binary.fits.data):

        # Print current file being processed
        if (((index+1) % imgIncrement) == 0):
            print("Processed Image #: ",(index+1),"/",imgNum)

        # FFT function requires little-endian data, so casting it
        img = img.astype(float)

        # Calculate 2D power spectrum
        # This gives us only real values
        psdImg = np.abs(fft2(img))**2

        # Accumulate current PSD value
        psdSum = np.add(psdSum,psdImg)

    # Divide by # of images to calculate average
    psdAvg = np.divide(psdSum,imgNum)

    # Normalizing FFT
    psdAvg = np.divide(psdAvg, (psdAvg.size)**2)

#Otherwise if FITS data is only one image
elif (len(binary.fits.shape) == 2):
    # FFT function requires little-endian data, so casting it
    img = binary.fits.astype(float)

    # Calculate 2D power spectrum
    # This gives us only real values
    psdImg = np.abs(fft2(img))**2

    # Normalizing FFT
    psdAvg = np.divide(psdImg, (psdImg.size)**2)

    binary.psd.data = fftshift(psdAvg)

In [None]:
%%timeit -n 1 -r 1
binary.fits.read(numDimensions=3)
# Preprocess .FITS with Python, time it
# Command copied from labeyrieClasses
# Modified with all "if" statements removed

psdSum = np.zeros(binary.fits.data.shape[1:3])

imgNum = np.shape(binary.fits.data)[0] # Number of images
imgIncrement = imgNum/20 # How often to display a status message

# Looping through all images in cube
for index,img in enumerate(binary.fits.data):

    # FFT function requires little-endian data, so casting it
    img = img.astype(float)

    # Calculate 2D power spectrum
    # This gives us only real values
    psdImg = np.abs(fft2(img))**2

    # Accumulate current PSD value
    psdSum = np.add(psdSum,psdImg)

# Divide by # of images to calculate average
psdAvg = np.divide(psdSum,imgNum)

# Normalizing FFT
psdAvg = np.divide(psdAvg, (psdAvg.size)**2)

In [None]:
%%timeit -n 1 -r 1
binary.fits.read(numDimensions=3)
# Preprocess .FITS with Python, time it
# Command copied from labeyrieClasses
# Modified with all "if" statements removed
# Casting to np.float32 instead

psdSum = np.zeros(binary.fits.data.shape[1:3])

imgNum = np.shape(binary.fits.data)[0] # Number of images
imgIncrement = imgNum/20 # How often to display a status message

# Looping through all images in cube
for index,img in enumerate(binary.fits.data):

    # FFT function requires little-endian data, so casting it
    img = img.astype(np.float32)

    # Calculate 2D power spectrum
    # This gives us only real values
    psdImg = np.abs(fft2(img))**2

    # Accumulate current PSD value
    psdSum = np.add(psdSum,psdImg)

# Divide by # of images to calculate average
psdAvg = np.divide(psdSum,imgNum)

# Normalizing FFT
psdAvg = np.divide(psdAvg, (psdAvg.size)**2)

In [None]:
# Checking difference in casting to different types
%timeit test = binary.fits.data.astype(np.float32)
%timeit test = binary.fits.data.astype(float)

In [None]:
%%timeit -n 1 -r 1
binary.fits.read(numDimensions=3)
# Preprocess .FITS with Python, time it
# Command copied from labeyrieClasses
# Modified with all "if" statements removed
# Trying to pre-cast the entire cube before the loop

psdSum = np.zeros(binary.fits.data.shape[1:3])

imgNum = np.shape(binary.fits.data)[0] # Number of images
imgIncrement = imgNum/20 # How often to display a status message

dataFloat32 = binary.fits.data.astype(np.float32)

# Looping through all images in cube
for index,img in enumerate(dataFloat32):

    # Calculate 2D power spectrum
    # This gives us only real values
    psdImg = np.abs(fft2(img))**2

    # Accumulate current PSD value
    psdSum = np.add(psdSum,psdImg)

# Divide by # of images to calculate average
psdAvg = np.divide(psdSum,imgNum)

# Normalizing FFT
psdAvg = np.divide(psdAvg, (psdAvg.size)**2)

# Shifting FFT
psdAvg = np.fft.fftshift(psdAvg)

plt.imshow(np.log10(psdAvg+1E-3))

In [None]:
%%cython --annotate
# A test cython module
cimport numpy as np
import numpy as np

def takePSD(np.ndarray image):

    cdef np.ndarray image_f32 = np.zeros([512,512], dtype=np.float32)
    cdef np.ndarray psd = np.zeros([512,512], dtype=np.float32)

    #print("Hello World")
    
    # FFT function requires little-endian data, so casting it
    image_f32 = image.astype(np.float32)

    # Calculate 2D power spectrum
    # This gives us only real values
    psd = np.power(np.abs(np.fft.fft2(image_f32)),2)
    
    # Normalizing FFT
    psd = np.divide(psd, (psd.size)**2)
    
    # Shifting FFT
    psd = np.fft.fftshift(psd)

    return psd
    #return image_f32


In [None]:
#%%timeit -n 1 -r 1
binary.fits.read(numDimensions=3)
# Run the test Cython function

%timeit -n 100 -r 1 psd = takePSD(binary.fits.data[0])

#plt.figure()
#plt.imshow(binary.fits.data[0])

#plt.figure()
#plt.imshow(np.log10(psd+1E-3))
#plt.colorbar()


In [None]:
%%cython --annotate
# Make a preprocess module in Cython
cimport numpy as np
import numpy as np

def preprocess(np.ndarray fits):
    # PSD of present image
    cdef np.ndarray psdImg = np.zeros((512,512), dtype=np.float32)
    # Accumulated PSD values
    cdef np.ndarray psdAvg = np.zeros((512,512), dtype=np.float32)
    # Accumulated PSD values
    cdef np.ndarray img = np.zeros((512,512), dtype=np.float32)
    
    fitsData_32 = fits.astype(np.float32)
    
    # Loop through images in cube
    # Looping through all images in cube
    for img in fitsData_32:

        # Calculate 2D power spectrum
        # This gives us only real values
        psdImg = np.abs(np.fft.fft2(img))**2

        # Accumulate current PSD value
        psdAvg = np.add(psdAvg,psdImg)
        
    # Divide by # of images to calculate average
    psdAvg = np.divide(psdAvg,1000)

    # Normalizing FFT
    psdAvg = np.divide(psdAvg, (psdAvg.size)**2)
    
    # Shifting FFT
    psdAvg = np.fft.fftshift(psdAvg)
    
    return psdAvg


In [None]:
%%timeit -n 1 -r 1
binary.fits.read(numDimensions=3)
# Run the test Cython function
psdAvg = preprocess(binary.fits.data)

plt.figure()
plt.imshow(np.log10(psdAvg+1E-3))
plt.colorbar()


In [None]:
%%timeit -n 1 -r 1
binary.fits.read(numDimensions=3)
# Calculate PSD with Cython function
psdAvg = np.zeros((512,512),dtype=(np.float32))
psdSum = np.zeros((512,512),dtype=(np.float32))

# Looping through all images in cube
for index,img in enumerate(binary.fits.data):

    # Calculate 2D power spectrum
    # This gives us only real values
    psdImg = takePSD(img)

    # Accumulate current PSD value
    psdSum = np.add(psdSum,psdImg)

# Divide by # of images to calculate average
psdAvg = np.divide(psdSum,1000)

# Normalizing FFT
psdAvg = np.divide(psdAvg, (psdAvg.size)**2)

# Shifting FFT
psdAvg = np.fft.fftshift(psdAvg)

plt.imshow(np.log10(psdAvg+1E-3))

In [None]:
# We see that it is actually faster to not use numpy
fits = binary.fits.data[0].astype(np.float32)
%timeit psd = np.abs(fft2(fits))**2

In [None]:
%%cython --annotate
# Trying to not use numpy for the power of two factor
cimport numpy as np
import numpy as np

def takePSD(np.ndarray image):

    cdef np.ndarray psd = np.zeros([512,512], dtype=np.float32)

    # Calculate 2D power spectrum
    # This gives us only real values
    psd = np.abs(np.fft.fft2(image))**2
    return psd



In [None]:
#%%timeit -n 1 -r 1
binary.fits.read(numDimensions=3)
# Run the test Cython function
# This is still slower than non-cython

fits = binary.fits.data[0].astype(np.float32)

%timeit -n 100 -r 1 psd = takePSD(binary.fits.data[0])


In [None]:
# We see that the PSD calculated is actually a float64, not a float32
fits = binary.fits.data[0].astype(np.float32)

fits_F = np.fft.fft2(fits)
fits_Fabs = np.abs(fits_F)
fits_PSD = fits_Fabs**2

print(fits_F.dtype)
print(fits_Fabs.dtype)
print(fits_PSD.dtype)

In [None]:
%%cython --annotate
# Creating intermediate variables with each correct type
cimport numpy as np
import numpy as np

def takePSD(np.ndarray image):
    cdef np.ndarray image_F = np.zeros([512,512], dtype=np.complex128)
    cdef np.ndarray image_Fabs = np.zeros([512,512], dtype=np.float64)
    cdef np.ndarray psd = np.zeros([512,512], dtype=np.float64)

    image_F = np.fft.fft2(image)
    image_Fabs = np.abs(image_F)
    psd = image_Fabs**2
    
    return psd

In [None]:
#%%timeit -n 1 -r 1
binary.fits.read(numDimensions=3)
# Run the test Cython function
# This is still slower than non-cython

fits = binary.fits.data[0].astype(np.float32)

%timeit -n 100 -r 1 psd = takePSD(fits)

In [None]:
%%cython --annotate
# Using the _t suffix on datatypes
cimport numpy as np
import numpy as np

def takePSD(np.ndarray[np.float32_t, ndim=2] image):
    cdef np.ndarray[np.complex128_t, ndim=2] image_F = np.zeros([512,512], dtype=np.complex128)
    cdef np.ndarray[np.float64_t, ndim=2] image_Fabs = np.zeros([512,512], dtype=np.float64)
    cdef np.ndarray[np.float64_t, ndim=2] psd = np.zeros([512,512], dtype=np.float64)

    image_F = np.fft.fft2(image)
    image_Fabs = np.abs(image_F)
    psd = image_Fabs**2
    
    return psd

In [None]:
#%%timeit -n 1 -r 1
binary.fits.read(numDimensions=3)
# Run the test Cython function
# This is still slower than non-cython

fits = binary.fits.data[0].astype(np.float32)

%timeit -n 100 -r 1 psd = takePSD(fits)

In [None]:
%%cython --annotate
# Trying cpdef
cimport numpy as np
import numpy as np

cpdef takePSD(np.ndarray[np.float32_t, ndim=2] image):
    cdef np.ndarray[np.complex128_t, ndim=2] image_F = np.zeros([512,512], dtype=np.complex128)
    cdef np.ndarray[np.float64_t, ndim=2] image_Fabs = np.zeros([512,512], dtype=np.float64)
    cdef np.ndarray[np.float64_t, ndim=2] psd = np.zeros([512,512], dtype=np.float64)

    image_F = np.fft.fft2(image)
    image_Fabs = np.abs(image_F)
    psd = image_Fabs**2
    
    return psd

In [None]:
#%%timeit -n 1 -r 1
binary.fits.read(numDimensions=3)
# Run the test cpdef
# This is still slower  nthanon-cython

fits = binary.fits.data[0].astype(np.float32)

%timeit -n 100 -r 1 psd = takePSD(fits)

In [None]:
from scipy.fftpack import fft2
import numpy as np

#

def psdAvgCalc(fits):
    psdSum = np.zeros((512,512),dtype=np.float64)
    psdImg = np.zeros((512,512),dtype=np.float64)
    psdAvg = np.zeros((512,512),dtype=np.float64)           
               
    # Looping through all images in cube
    for img in fits:

        # FFT function requires little-endian data, so casting it
        img = img.astype(np.float32)

        # Calculate 2D power spectrum
        # This gives us only real values
        psdImg = np.abs(fft2(img))**2

        # Accumulate current PSD value
        psdSum = np.add(psdSum,psdImg)

    # Divide by # of images to calculate average
    psdAvg = np.divide(psdSum,1000)

    # Normalizing FFT
    psdAvg = np.divide(psdAvg, (psdAvg.size)**2)
    
    return psdAvg
    

In [None]:
%%timeit -n 1 -r 1
binary.fits.read(numDimensions=3)
# Experimenting with original preprocess 
#  code again
psdAvg = psdAvgCalc(binary.fits.data) 