projectM-visualizer · revmischa · May 23, 2018 · May 16, 2018 · May 16, 2018 · May 16, 2018
diff --git a/src/libprojectM/MilkdropPresetFactory/PresetFrameIO.cpp b/src/libprojectM/MilkdropPresetFactory/PresetFrameIO.cpp
@@ -37,17 +37,17 @@ float **alloc_mesh(size_t gx, size_t gy)
 	// round gy up to multiple 4 (for possible SSE optimization) 
 	gy = (gy+3) & ~(size_t)3;
 
-	float **mesh = (float **)wipemalloc(gx * sizeof(float *));
-	float *m = (float *)wipemalloc(gx * gy * sizeof(float));
+	float **mesh = (float **)wipe_aligned_alloc(gx * sizeof(float *));
+	float *m = (float *)wipe_aligned_alloc(gx * gy * sizeof(float));
 	for ( int x = 0; x < gx; x++ )
 		mesh[x] = m + (gy * x);
 	return mesh;
 }
 
 float **free_mesh(float **mesh)
 {
-	free(mesh[0]);
-	free(mesh);
+	wipe_aligned_free(mesh[0]);
+	wipe_aligned_free(mesh);
 	return NULL;
 }
 
@@ -168,11 +168,8 @@ void PresetOutputs::Render(const BeatDetect &music, const PipelineContext &conte
 
 
 // N.B. The more optimization that can be done on this method, the better! This is called a lot and can probably be improved.
-// NOTE : Keep PerPixelMath_sse and PerPixelMath_c in sync
-
 void PresetOutputs::PerPixelMath_c(const PipelineContext &context)
 {
-
 	for (int x = 0; x < gx; x++)
 	{
 		for (int y = 0; y < gy; y++)
@@ -283,28 +280,6 @@ inline __m128 _mm_cosf(__m128 x)
 }
 
 
-/**
- * SSE instructions let us do the math on 4 floats in parallel.  You an see the main loop uses y += 4.  Each time through the loop,
- * we read operands in group of 4.  This looks like a mess, but just think of it as rewriting the infix expressions as a prefix expression
- * 
- * e.g.
- *   this->orig_x[x][y] * 0.5f * fZoom2Inv + 0.5f
- * becomes
- *			__m128 x_mesh = 
- *				_mm_add_ps(
- *					_mm_mul_ps(
- *						_mm_load_ps(&this->orig_x[x][y]), 
- *						_mm_mul_ps(fZoomInv,_mm_set_ps1(0.5f))),		// CONSIDER: common sub-expression
- *					_mm_set_ps1(0.5f));
- *
- * _mm_load_ps loads an SSE register from memory (4 floats at a time)
- * _mm_set_ps1 takes a constant 0.5 and loads it (replicated 4 times)
- *  * The other expressions are what they sound like:
- *    a + b --> _mm_add_ps(a, b)
- *    a * b --> _mm_mul_ps(a, b)
- */
-// NOTE : Keep PerPixelMath_sse and PerPixelMath_c in sync
-// NOTE : Even better would be to rewrite this as a compute shader
 void PresetOutputs::PerPixelMath_sse(const PipelineContext &context)
 {
 	for (int x = 0; x < gx; x++)

diff --git a/src/libprojectM/wipemalloc.cpp b/src/libprojectM/wipemalloc.cpp
@@ -25,21 +25,61 @@
  */
 
 #include "wipemalloc.h"
+#include <assert.h>
 
- void *wipemalloc( size_t count ) {
-    count = (count + 15) & ~(size_t)15;
-    void *mem = aligned_alloc( 16, count );
+ void *wipemalloc( size_t count )
+ {
+    void *mem = malloc( count );
     if ( mem != NULL ) {
         memset( mem, 0, count );
       } else {
         printf( "wipemalloc() failed to allocate %d bytes\n", (int)count );
       }
     return mem;
-  }
+ }
 
 /** Safe memory deallocator */
  void wipefree( void *ptr ) {
     if ( ptr != NULL ) {
         free( ptr );
       }
   }
+
+void *wipe_aligned_alloc( size_t align, size_t size )
+{
+#if TARGET_OS_MAC
+    // only support powers of 2 for align
+    assert( (align & (align-1)) == 0 );
+    void *allocated = malloc(size + align - 1 + sizeof(void*));
+    if (allocated == NULL)
+    {
+        printf( "wipe_aligned_malloc() failed to allocate %d bytes\n", (int)size );
+        return NULL;
+    }
+    void *ret = (void*) (((size_t)allocated + sizeof(void*) + align -1) & ~(align-1));
+    *((void**)((size_t)ret - sizeof(void*))) = allocated;
+    return ret;
+#else
+    void *mem = aligned_alloc( align, size );
+    if ( mem != NULL ) {
+        memset( mem, 0, size );
+      } else {
+        printf( "wipe_aligned_alloc() failed to allocate %d bytes\n", (int)size );
+      }
+    return mem;
+#endif
+}
+
+void wipe_aligned_free( void *p )
+{
+#if TARGET_OS_MAC
+    if (p != NULL)
+    {
+        void *allocated = *((void**)((size_t)p - sizeof(void*)));
+        free(allocated);
+    }
+#else
+    if (p != NULL)
+        free(p);
+#endif
+}
diff --git a/src/libprojectM/wipemalloc.h b/src/libprojectM/wipemalloc.h
@@ -57,4 +57,8 @@
  void *wipemalloc( size_t count );
  void wipefree( void *ptr );
 
+/** wipe_aligned_malloc() must be matched with aligned_free() */
+ void *wipe_aligned_alloc( size_t align, size_t count);
+ inline void *wipe_aligned_alloc( size_t count ) { return wipe_aligned_alloc(16,count); }
+ void wipe_aligned_free( void *ptr );
 #endif /** !_WIPEMALLOC_H */