Skip to content

Commit

Permalink
FFTDecoder: performance optimizations, now zero-pad vs truncating for…
Browse files Browse the repository at this point in the history
… improved RBW
  • Loading branch information
azonenberg committed Aug 15, 2020
1 parent 07d4bbd commit 79af9f3
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 41 deletions.
25 changes: 23 additions & 2 deletions scopehal/AlignedAllocator.h
Expand Up @@ -36,6 +36,10 @@
#ifndef AlignedAllocator_h
#define AlignedAllocator_h

#ifdef _WIN32
#include <windows.h>
#endif

/**
@brief Aligned memory allocator for STL containers
Expand Down Expand Up @@ -112,7 +116,11 @@ class AlignedAllocator
throw std::length_error("AlignedAllocator<T>::allocate(): requested size is too large, integer overflow?");

//Do the actual allocation
T* ret = static_cast<T*>(aligned_alloc(alignment, n*sizeof(T)));
#ifdef _WIN32
T* ret = static_cast<T*>(_aligned_malloc(n*sizeof(T), alignment));
#else
T* ret = static_cast<T*>(aligned_alloc(alignment, n*sizeof(T)));
#endif

//Error check
if(ret == NULL)
Expand All @@ -122,7 +130,17 @@ class AlignedAllocator
}

void deallocate(T* const p, const size_t /*unused*/) const
{ free(p); }
{
#ifdef _WIN32
_aligned_free(p);
#else
free(p);
#endif
}

//convenience wrapper
void deallocate(T* const p) const
{ deallocate(p, 1); }

//Not quite sure what this is for but apparently we need it?
template<typename U>
Expand All @@ -133,4 +151,7 @@ class AlignedAllocator
AlignedAllocator& operator=(const AlignedAllocator&) = delete;
};

//Global allocator for AVX helpers
extern AlignedAllocator<float, 32> g_floatVectorAllocator;

#endif
2 changes: 2 additions & 0 deletions scopehal/scopehal.cpp
Expand Up @@ -59,6 +59,8 @@ bool g_hasAvx512DQ = false;
bool g_hasAvx512VL = false;
bool g_hasAvx2 = false;

AlignedAllocator<float, 32> g_floatVectorAllocator;

/**
@brief Static initialization for SCPI transports
*/
Expand Down
79 changes: 40 additions & 39 deletions scopeprotocols/FFTDecoder.cpp
Expand Up @@ -28,12 +28,8 @@
***********************************************************************************************************************/

#include "../scopehal/scopehal.h"
#include "../scopehal/AlignedAllocator.h"
#include "FFTDecoder.h"
#include <ffts.h>

#ifdef _WIN32
#include <windows.h>
#endif

using namespace std;

Expand All @@ -49,6 +45,21 @@ FFTDecoder::FFTDecoder(string color)
//Set up channels
m_signalNames.push_back("din");
m_channels.push_back(NULL);

m_cachedNumPoints = 0;
m_rdin = NULL;
m_rdout = NULL;
m_plan = NULL;
}

FFTDecoder::~FFTDecoder()
{
if(m_rdin)
g_floatVectorAllocator.deallocate(m_rdin);
if(m_rdout)
g_floatVectorAllocator.deallocate(m_rdout);
if(m_plan)
ffts_free(m_plan);
}

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -125,37 +136,36 @@ void FFTDecoder::Refresh()
return;
}

//Truncate to next power of 2 down
const size_t npoints = pow(2,floor(log2(npoints_raw)));
//Round up to next power of two
const size_t npoints = pow(2, ceil(log2(npoints_raw)));
LogTrace("FFTDecoder: processing %zu raw points\n", npoints_raw);
LogTrace("Rounded to %zu\n", npoints);

//Format the input data as raw samples for the FFT
//TODO: handle non-uniform sample rates
float* rdin;
//Reallocate buffers if needed
size_t insize = npoints * sizeof(float);

#ifdef _WIN32
rdin = (float*)_aligned_malloc(insize, 32);
#else
posix_memalign((void**)&rdin, 32, insize);
#endif

memcpy(rdin, &din->m_samples[0], insize);

float* rdout;
const size_t nouts = npoints/2 + 1;
if(m_cachedNumPoints != npoints_raw)
{
m_cachedNumPoints = npoints_raw;

if(m_rdin)
g_floatVectorAllocator.deallocate(m_rdin);
if(m_rdout)
g_floatVectorAllocator.deallocate(m_rdout);
if(m_plan)
ffts_free(m_plan);

m_rdin = g_floatVectorAllocator.allocate(npoints);
m_rdout = g_floatVectorAllocator.allocate(2*nouts);
m_plan = ffts_init_1d_real(npoints, FFTS_FORWARD);
}

#ifdef _WIN32
rdout = (float*)_aligned_malloc(2 * nouts * sizeof(float), 32);
#else
posix_memalign((void**)&rdout, 32, 2 * nouts * sizeof(float));
#endif
//Copy the input, then zero pad the rest
memcpy(m_rdin, &din->m_samples[0], npoints_raw * sizeof(float));
memset(m_rdin + npoints_raw, 0, (npoints - npoints_raw) * sizeof(float));

//Calculate the FFT
auto plan = ffts_init_1d_real(npoints, FFTS_FORWARD);
ffts_execute(plan, &rdin[0], &rdout[0]);
ffts_free(plan);
ffts_execute(m_plan, m_rdin, m_rdout);

//Set up output and copy timestamps
auto cap = new AnalogWaveform;
Expand All @@ -174,8 +184,8 @@ void FFTDecoder::Refresh()
for(size_t i=1; i<nouts; i++) //don't print (DC offset?) term 0
//real fft has symmetric output, ignore the redundant image of the data
{
float a = rdout[i*2];
float b = rdout[i*2 + 1];
float a = m_rdout[i*2];
float b = m_rdout[i*2 + 1];
float mag = sqrtf(a*a + b*b);
//float freq = (0.5f * i * sample_ghz * 1000) / nouts;

Expand All @@ -196,13 +206,4 @@ void FFTDecoder::Refresh()
}

SetData(cap);

//Clean up
#ifdef _WIN32
_aligned_free(rdin);
_aligned_free(rdout);
#else
free(rdin);
free(rdout);
#endif
}
6 changes: 6 additions & 0 deletions scopeprotocols/FFTDecoder.h
Expand Up @@ -36,11 +36,13 @@
#define FFTDecoder_h

#include "../scopehal/ProtocolDecoder.h"
#include <ffts/ffts.h>

class FFTDecoder : public ProtocolDecoder
{
public:
FFTDecoder(std::string color);
virtual ~FFTDecoder();

virtual void Refresh();

Expand All @@ -57,6 +59,10 @@ class FFTDecoder : public ProtocolDecoder
PROTOCOL_DECODER_INITPROC(FFTDecoder)

protected:
size_t m_cachedNumPoints;
float* m_rdin;
float* m_rdout;
ffts_plan_t* m_plan;
};

#endif

0 comments on commit 79af9f3

Please sign in to comment.