Skip to content

Commit

Permalink
optimize float complex FFT
Browse files Browse the repository at this point in the history
1. To optimize FFT, the algorithm is changed. Bit reversal is removed and radix 8 is added.
2. After test, the optimized FFT show the best performance, so that the old implementations are removed.

The performance result is as follows:

toolchain: gcc 4.8 at -O2
omx fft's execute time is the base. The ratio is less, the performance is better.

panda board A9:
|     |16    |32    |64    |128   |256   |512   |1024  |2048  |4096  |
|Ne10 |84.27%|89.57%|85.63%|85.79%|87.89%|87.91%|83.51%|97.08%|92.68%|
|omx  |100%  |100%  |100%  |100%  |100%  |100%  |100%  |100%  |100%  |

nexus10 A15:
|     |16    |32    |64    |128   |256   |512   |1024  |2048  |4096  |
|Ne10 |84.88%|98.43%|89.46%|101.0%|99.24%|103.2%|93.80%|105.1%|97.44%|
|omx  |100%  |100%  |100%  |100%  |100%  |100%  |100%  |100%  |100%  |

Change-Id: I363ee1602f08532e566d3a5a4f3d7a99972a1283
  • Loading branch information
yangzhang committed Jun 4, 2014
1 parent 30a6c3f commit c3bbc61
Show file tree
Hide file tree
Showing 20 changed files with 1,390 additions and 7,016 deletions.
51 changes: 0 additions & 51 deletions inc/NE10_dsp.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,22 +46,6 @@ extern "C" {
/* fft functions*/

/* function pointers*/
extern void (*ne10_radix4_butterfly_float) (ne10_float32_t *pDst,
ne10_float32_t *pSrc,
ne10_uint16_t N,
ne10_float32_t *pCoef);

extern void (*ne10_radix4_butterfly_inverse_float) (ne10_float32_t *pDst,
ne10_float32_t *pSrc,
ne10_uint16_t N,
ne10_float32_t *pCoef,
ne10_float32_t onebyN);

extern void (*ne10_rfft_float) (const ne10_rfft_instance_f32_t * S,
ne10_float32_t * pSrc,
ne10_float32_t * pDst,
ne10_float32_t * pTemp);

extern void (*ne10_fft_c2c_1d_float32) (ne10_fft_cpx_float32_t *fout,
ne10_fft_cpx_float32_t *fin,
ne10_fft_cpx_float32_t *twiddles,
Expand Down Expand Up @@ -150,22 +134,6 @@ extern "C" {
extern ne10_fft_r2c_cfg_int16_t ne10_fft_alloc_r2c_int16 (ne10_int32_t nfft);

/* C version*/
extern void ne10_radix4_butterfly_float_c (ne10_float32_t *pDst,
ne10_float32_t *pSrc,
ne10_uint16_t N,
ne10_float32_t *pCoef);

extern void ne10_radix4_butterfly_inverse_float_c (ne10_float32_t *pDst,
ne10_float32_t *pSrc,
ne10_uint16_t N,
ne10_float32_t *pCoef,
ne10_float32_t onebyN);

extern void ne10_rfft_float_c (const ne10_rfft_instance_f32_t * S,
ne10_float32_t * pSrc,
ne10_float32_t * pDst,
ne10_float32_t * pTemp);

extern void ne10_fft_c2c_1d_float32_c (ne10_fft_cpx_float32_t *fout,
ne10_fft_cpx_float32_t *fin,
ne10_fft_cpx_float32_t *twiddles,
Expand Down Expand Up @@ -237,25 +205,6 @@ extern "C" {


/* NEON version*/
/**
* @addtogroup CFFT_CIFFT
* @{
*/
extern void ne10_radix4_butterfly_float_neon (ne10_float32_t *pDst,
ne10_float32_t *pSrc,
ne10_uint16_t N,
ne10_float32_t *pCoef)
asm ("ne10_radix4_butterfly_float_neon");

extern void ne10_radix4_butterfly_inverse_float_neon (ne10_float32_t *pDst,
ne10_float32_t *pSrc,
ne10_uint16_t N,
ne10_float32_t *pCoef,
ne10_float32_t onebyN)
asm ("ne10_radix4_butterfly_inverse_float_neon");
/** @} */ //end of CFFT_CIFFT group


extern void ne10_rfft_float_neon (const ne10_rfft_instance_f32_t * S,
ne10_float32_t * pSrc,
ne10_float32_t * pDst,
Expand Down
6 changes: 0 additions & 6 deletions modules/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -173,13 +173,9 @@ if(NE10_ENABLE_DSP)
# Add dsp C files.
set(NE10_DSP_C_SRCS
${PROJECT_SOURCE_DIR}/common/NE10_mask_table.c
${PROJECT_SOURCE_DIR}/modules/dsp/NE10_cfft.c
${PROJECT_SOURCE_DIR}/modules/dsp/NE10_cfft_init.c
${PROJECT_SOURCE_DIR}/modules/dsp/NE10_rfft.c
${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fft_float32.c
${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fft_int32.c
${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fft_int16.c
${PROJECT_SOURCE_DIR}/modules/dsp/NE10_rfft_init.c
${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fir.c
${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fir_init.c
${PROJECT_SOURCE_DIR}/modules/dsp/NE10_iir.c
Expand All @@ -188,7 +184,6 @@ if(NE10_ENABLE_DSP)

# Add dsp intrinsic NEON files.
set(NE10_DSP_INTRINSIC_SRCS
${PROJECT_SOURCE_DIR}/modules/dsp/NE10_rfft.neon.c
${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fft_float32.neon.c
${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fft_int32.neon.c
${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fft_int16.neon.c
Expand All @@ -201,7 +196,6 @@ if(NE10_ENABLE_DSP)

# Add dsp NEON files.
set(NE10_DSP_NEON_SRCS
${PROJECT_SOURCE_DIR}/modules/dsp/NE10_cfft.neon.s
${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fft_float32.neon.s
${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fft_int32.neon.s
${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fft_int16.neon.s
Expand Down
Loading

0 comments on commit c3bbc61

Please sign in to comment.