FFTDecoder: performance optimizations, now zero-pad vs truncating for…

… improved RBW
ngscopeclient · Aug 15, 2020 · 79af9f3 · 79af9f3
1 parent 07d4bbd
commit 79af9f3
Show file tree

Hide file tree

Showing 4 changed files with 71 additions and 41 deletions.
diff --git a/scopehal/AlignedAllocator.h b/scopehal/AlignedAllocator.h
@@ -36,6 +36,10 @@
 #ifndef AlignedAllocator_h
 #define AlignedAllocator_h
 
+#ifdef _WIN32
+#include <windows.h>
+#endif
+
 /**
 	@brief Aligned memory allocator for STL containers
 
@@ -112,7 +116,11 @@ class AlignedAllocator
 			throw std::length_error("AlignedAllocator<T>::allocate(): requested size is too large, integer overflow?");
 
 		//Do the actual allocation
-		T* ret = static_cast<T*>(aligned_alloc(alignment, n*sizeof(T)));
+		#ifdef _WIN32
+			T* ret = static_cast<T*>(_aligned_malloc(n*sizeof(T), alignment));
+		#else
+			T* ret = static_cast<T*>(aligned_alloc(alignment, n*sizeof(T)));
+		#endif
 
 		//Error check
 		if(ret == NULL)
@@ -122,7 +130,17 @@ class AlignedAllocator
 	}
 
 	void deallocate(T* const p, const size_t /*unused*/) const
-	{ free(p); }
+	{
+		#ifdef _WIN32
+			_aligned_free(p);
+		#else
+			free(p);
+		#endif
+	}
+
+	//convenience wrapper
+	void deallocate(T* const p) const
+	{ deallocate(p, 1); }
 
 	//Not quite sure what this is for but apparently we need it?
 	template<typename U>
@@ -133,4 +151,7 @@ class AlignedAllocator
 	AlignedAllocator& operator=(const AlignedAllocator&) = delete;
 };
 
+//Global allocator for AVX helpers
+extern AlignedAllocator<float, 32> g_floatVectorAllocator;
+
 #endif
diff --git a/scopehal/scopehal.cpp b/scopehal/scopehal.cpp
@@ -59,6 +59,8 @@ bool g_hasAvx512DQ = false;
 bool g_hasAvx512VL = false;
 bool g_hasAvx2 = false;
 
+AlignedAllocator<float, 32> g_floatVectorAllocator;
+
 /**
 	@brief Static initialization for SCPI transports
  */

diff --git a/scopeprotocols/FFTDecoder.cpp b/scopeprotocols/FFTDecoder.cpp
@@ -28,12 +28,8 @@
 ***********************************************************************************************************************/
 
 #include "../scopehal/scopehal.h"
+#include "../scopehal/AlignedAllocator.h"
 #include "FFTDecoder.h"
-#include <ffts.h>
-
-#ifdef _WIN32
-#include <windows.h>
-#endif
 
 using namespace std;
 
@@ -49,6 +45,21 @@ FFTDecoder::FFTDecoder(string color)
 	//Set up channels
 	m_signalNames.push_back("din");
 	m_channels.push_back(NULL);
+
+	m_cachedNumPoints = 0;
+	m_rdin = NULL;
+	m_rdout = NULL;
+	m_plan = NULL;
+}
+
+FFTDecoder::~FFTDecoder()
+{
+	if(m_rdin)
+		g_floatVectorAllocator.deallocate(m_rdin);
+	if(m_rdout)
+		g_floatVectorAllocator.deallocate(m_rdout);
+	if(m_plan)
+		ffts_free(m_plan);
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -125,37 +136,36 @@ void FFTDecoder::Refresh()
 		return;
 	}
 
-	//Truncate to next power of 2 down
-	const size_t npoints = pow(2,floor(log2(npoints_raw)));
+	//Round up to next power of two
+	const size_t npoints = pow(2, ceil(log2(npoints_raw)));
 	LogTrace("FFTDecoder: processing %zu raw points\n", npoints_raw);
 	LogTrace("Rounded to %zu\n", npoints);
 
-	//Format the input data as raw samples for the FFT
-	//TODO: handle non-uniform sample rates
-	float* rdin;
+	//Reallocate buffers if needed
 	size_t insize = npoints * sizeof(float);
-
-#ifdef _WIN32
-	rdin = (float*)_aligned_malloc(insize, 32);
-#else
-	posix_memalign((void**)&rdin, 32, insize);
-#endif
-
-	memcpy(rdin, &din->m_samples[0], insize);
-
-	float* rdout;
 	const size_t nouts = npoints/2 + 1;
+	if(m_cachedNumPoints != npoints_raw)
+	{
+		m_cachedNumPoints = npoints_raw;
+
+		if(m_rdin)
+			g_floatVectorAllocator.deallocate(m_rdin);
+		if(m_rdout)
+			g_floatVectorAllocator.deallocate(m_rdout);
+		if(m_plan)
+			ffts_free(m_plan);
+
+		m_rdin = g_floatVectorAllocator.allocate(npoints);
+		m_rdout = g_floatVectorAllocator.allocate(2*nouts);
+		m_plan = ffts_init_1d_real(npoints, FFTS_FORWARD);
+	}
 
-#ifdef _WIN32
-	rdout = (float*)_aligned_malloc(2 * nouts * sizeof(float), 32);
-#else
-	posix_memalign((void**)&rdout, 32, 2 * nouts * sizeof(float));
-#endif
+	//Copy the input, then zero pad the rest
+	memcpy(m_rdin, &din->m_samples[0], npoints_raw * sizeof(float));
+	memset(m_rdin + npoints_raw, 0, (npoints - npoints_raw) * sizeof(float));
 
 	//Calculate the FFT
-	auto plan = ffts_init_1d_real(npoints, FFTS_FORWARD);
-	ffts_execute(plan, &rdin[0], &rdout[0]);
-	ffts_free(plan);
+	ffts_execute(m_plan, m_rdin, m_rdout);
 
 	//Set up output and copy timestamps
 	auto cap = new AnalogWaveform;
@@ -174,8 +184,8 @@ void FFTDecoder::Refresh()
 	for(size_t i=1; i<nouts; i++)	//don't print (DC offset?) term 0
 									//real fft has symmetric output, ignore the redundant image of the data
 	{
-		float a = rdout[i*2];
-		float b = rdout[i*2 + 1];
+		float a = m_rdout[i*2];
+		float b = m_rdout[i*2 + 1];
 		float mag = sqrtf(a*a + b*b);
 		//float freq = (0.5f * i * sample_ghz * 1000) / nouts;
 
@@ -196,13 +206,4 @@ void FFTDecoder::Refresh()
 	}
 
 	SetData(cap);
-
-	//Clean up
-#ifdef _WIN32
-	_aligned_free(rdin);
-	_aligned_free(rdout);
-#else
-	free(rdin);
-	free(rdout);
-#endif
 }
diff --git a/scopeprotocols/FFTDecoder.h b/scopeprotocols/FFTDecoder.h
@@ -36,11 +36,13 @@
 #define FFTDecoder_h
 
 #include "../scopehal/ProtocolDecoder.h"
+#include <ffts/ffts.h>
 
 class FFTDecoder : public ProtocolDecoder
 {
 public:
 	FFTDecoder(std::string color);
+	virtual ~FFTDecoder();
 
 	virtual void Refresh();
 
@@ -57,6 +59,10 @@ class FFTDecoder : public ProtocolDecoder
 	PROTOCOL_DECODER_INITPROC(FFTDecoder)
 
 protected:
+	size_t m_cachedNumPoints;
+	float* m_rdin;
+	float* m_rdout;
+	ffts_plan_t* m_plan;
 };
 
 #endif